gem5/splash2/codes/apps/ocean/non_contiguous_partitions/slave2.C
Sanchayan Maity 0f4b39775c Fix splash2 benchmark
During the last commit of splash2 benchmark it seems before committing
when we ran "make clean", it effectively undid what the patch at below
link did
http://www.capsl.udel.edu/splash/Download.html

Fix this since without this it is not possible to build the arcane
splash2 benchmark.
2017-04-26 21:33:02 +05:30

1006 lines
32 KiB
C

/*************************************************************************/
/* */
/* Copyright (c) 1994 Stanford University */
/* */
/* All rights reserved. */
/* */
/* Permission is given to use, copy, and modify this software for any */
/* non-commercial purpose as long as this copyright notice is not */
/* removed. All other uses, including redistribution in whole or in */
/* part, are forbidden without prior written permission. */
/* */
/* This software is provided with absolutely no warranty and no */
/* support. */
/* */
/*************************************************************************/
/* ****************
subroutine slave2
**************** */
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <stdlib.h>
#include "decs.h"
void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols)
{
long i;
long j;
long iindex;
double hh1;
double hh3;
double hinv;
double h1inv;
long istart;
long iend;
long jstart;
long jend;
long ist;
long ien;
long jst;
long jen;
double fac;
double ressqr;
double timst;
double f4;
long psiindex;
double psiaipriv;
long multi_start;
long multi_end;
ressqr = lev_res[numlev-1] * lev_res[numlev-1];
/* ***************************************************************
f i r s t p h a s e (of timestep calculation)
***************************************************************/
if (procid == MASTER) {
wrk1->ga[0][0]=0.0;
}
if (procid == nprocs-xprocs) {
wrk1->ga[im-1][0]=0.0;
}
if (procid == xprocs-1) {
wrk1->ga[0][jm-1]=0.0;
}
if (procid == nprocs-1) {
wrk1->ga[im-1][jm-1]=0.0;
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk1->ga[0][j] = 0.0;
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk1->ga[im-1][j] = 0.0;
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk1->ga[j][0] = 0.0;
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk1->ga[j][jm-1] = 0.0;
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk1->ga[i][iindex] = 0.0;
}
}
if (procid == MASTER) {
wrk1->gb[0][0]=0.0;
}
if (procid == nprocs-xprocs) {
wrk1->gb[im-1][0]=0.0;
}
if (procid == xprocs-1) {
wrk1->gb[0][jm-1]=0.0;
}
if (procid == nprocs-1) {
wrk1->gb[im-1][jm-1]=0.0;
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk1->gb[0][j] = 0.0;
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk1->gb[im-1][j] = 0.0;
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk1->gb[j][0] = 0.0;
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk1->gb[j][jm-1] = 0.0;
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk1->gb[i][iindex] = 0.0;
}
}
/* put the laplacian of psi{1,3} in work1{1,2}
note that psi(i,j,2) represents the psi3 array in
the original equations */
for(psiindex=0;psiindex<=1;psiindex++) {
if (procid == MASTER) {
wrk3->work1[psiindex][0][0] = 0;
}
if (procid == nprocs-xprocs) {
wrk3->work1[psiindex][im-1][0] = 0;
}
if (procid == xprocs-1) {
wrk3->work1[psiindex][0][jm-1] = 0;
}
if (procid == nprocs-1) {
wrk3->work1[psiindex][im-1][jm-1] = 0;
}
laplacalc(fields->psi[psiindex],
wrk3->work1[psiindex],
firstrow,lastrow,firstcol,lastcol,numrows,numcols);
}
if (procid == MASTER) {
wrk3->work2[0][0] = fields->psi[0][0][0]-fields->psi[1][0][0];
}
if (procid == nprocs-xprocs) {
wrk3->work2[im-1][0] = fields->psi[0][im-1][0]-fields->psi[1][im-1][0];
}
if (procid == xprocs-1) {
wrk3->work2[0][jm-1] = fields->psi[0][0][jm-1]-fields->psi[1][0][jm-1];
}
if (procid == nprocs-1) {
wrk3->work2[im-1][jm-1] = fields->psi[0][im-1][jm-1]-fields->psi[1][im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk3->work2[0][j] = fields->psi[0][0][j]-fields->psi[1][0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk3->work2[im-1][j] = fields->psi[0][im-1][j]-fields->psi[1][im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk3->work2[j][0] = fields->psi[0][j][0]-fields->psi[1][j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk3->work2[j][jm-1] = fields->psi[0][j][jm-1]-fields->psi[1][j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk3->work2[i][iindex] = fields->psi[0][i][iindex]-fields->psi[1][i][iindex];
}
}
/* set values of work3 array to h3/h * psi1 + h1/h * psi3 */
hh3 = h3/h;
hh1 = h1/h;
if (procid == MASTER) {
wrk2->work3[0][0] = hh3*fields->psi[0][0][0]+hh1*fields->psi[1][0][0];
}
if (procid == nprocs-xprocs) {
wrk2->work3[im-1][0] = hh3*fields->psi[0][im-1][0]+hh1*fields->psi[1][im-1][0];
}
if (procid == xprocs-1) {
wrk2->work3[0][jm-1] = hh3*fields->psi[0][0][jm-1]+hh1*fields->psi[1][0][jm-1];
}
if (procid == nprocs-1) {
wrk2->work3[im-1][jm-1] = hh3*fields->psi[0][im-1][jm-1]+hh1*fields->psi[1][im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk2->work3[0][j] = hh3*fields->psi[0][0][j]+hh1*fields->psi[1][0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk2->work3[im-1][j] = hh3*fields->psi[0][im-1][j]+hh1*fields->psi[1][im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk2->work3[j][0] = hh3*fields->psi[0][j][0]+hh1*fields->psi[1][j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk2->work3[j][jm-1] = hh3*fields->psi[0][j][jm-1]+hh1*fields->psi[1][j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk2->work3[i][iindex] = hh3*fields->psi[0][i][iindex]+hh1*fields->psi[1][i][iindex];
}
}
/* set values of temparray{1,3} to psim{1,3} */
for(psiindex=0;psiindex<=1;psiindex++) {
if (procid == MASTER) {
wrk5->temparray[psiindex][0][0] = fields->psi[psiindex][0][0];
}
if (procid == nprocs-xprocs) {
wrk5->temparray[psiindex][im-1][0] = fields->psi[psiindex][im-1][0];
}
if (procid == xprocs-1) {
wrk5->temparray[psiindex][0][jm-1] = fields->psi[psiindex][0][jm-1];
}
if (procid == nprocs-1) {
wrk5->temparray[psiindex][im-1][jm-1] = fields->psi[psiindex][im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk5->temparray[psiindex][0][j] = fields->psi[psiindex][0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk5->temparray[psiindex][im-1][j] = fields->psi[psiindex][im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk5->temparray[psiindex][j][0] = fields->psi[psiindex][j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk5->temparray[psiindex][j][jm-1] = fields->psi[psiindex][j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk5->temparray[psiindex][i][iindex] = fields->psi[psiindex][i][iindex];
}
}
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_1,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
s e c o n d p h a s e
*******************************************************
set values of psi{1,3} to psim{1,3} */
for(psiindex=0;psiindex<=1;psiindex++) {
if (procid == MASTER) {
fields->psi[psiindex][0][0] = fields->psim[psiindex][0][0];
}
if (procid == xprocs-1) {
fields->psi[psiindex][0][jm-1] = fields->psim[psiindex][0][jm-1];
}
if (procid == nprocs-xprocs) {
fields->psi[psiindex][im-1][0] = fields->psim[psiindex][im-1][0];
}
if (procid == nprocs-1) {
fields->psi[psiindex][im-1][jm-1] = fields->psim[psiindex][im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
fields->psi[psiindex][0][j] = fields->psim[psiindex][0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
fields->psi[psiindex][im-1][j] = fields->psim[psiindex][im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
fields->psi[psiindex][j][0] = fields->psim[psiindex][j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
fields->psi[psiindex][j][jm-1] = fields->psim[psiindex][j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
fields->psi[psiindex][i][iindex] = fields->psim[psiindex][i][iindex];
}
}
}
/* put the laplacian of the psim array
into the work7 array; first part of a three-laplacian
calculation to compute the friction terms */
for(psiindex=0;psiindex<=1;psiindex++) {
if (procid == MASTER) {
wrk5->work7[psiindex][0][0] = 0;
}
if (procid == nprocs-xprocs) {
wrk5->work7[psiindex][im-1][0] = 0;
}
if (procid == xprocs-1) {
wrk5->work7[psiindex][0][jm-1] = 0;
}
if (procid == nprocs-1) {
wrk5->work7[psiindex][im-1][jm-1] = 0;
}
laplacalc(fields->psim[psiindex],wrk5->work7[psiindex],firstrow,lastrow,firstcol,lastcol,numrows,numcols);
}
/* to the values of the work1{1,2} arrays obtained from the
laplacians of psi{1,2} in the previous phase, add to the
elements of every column the corresponding value in the
one-dimenional f array */
for(psiindex=0;psiindex<=1;psiindex++) {
if (procid == MASTER) {
wrk3->work1[psiindex][0][0] = wrk3->work1[psiindex][0][0] + wrk2->f[0];
}
if (procid == nprocs-xprocs) {
wrk3->work1[psiindex][im-1][0] = wrk3->work1[psiindex][im-1][0] + wrk2->f[0];
}
if (procid == xprocs-1) {
wrk3->work1[psiindex][0][jm-1] = wrk3->work1[psiindex][0][jm-1] + wrk2->f[jm-1];
}
if (procid == nprocs-1) {
wrk3->work1[psiindex][im-1][jm-1] = wrk3->work1[psiindex][im-1][jm-1] + wrk2->f[jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk3->work1[psiindex][0][j] = wrk3->work1[psiindex][0][j] + wrk2->f[j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk3->work1[psiindex][im-1][j] = wrk3->work1[psiindex][im-1][j] + wrk2->f[j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk3->work1[psiindex][j][0] = wrk3->work1[psiindex][j][0] + wrk2->f[j];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk3->work1[psiindex][j][jm-1] = wrk3->work1[psiindex][j][jm-1] + wrk2->f[j];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk3->work1[psiindex][i][iindex] = wrk3->work1[psiindex][i][iindex] +
wrk2->f[iindex];
}
}
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_2,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
t h i r d p h a s e
*******************************************************
put the jacobian of the work1{1,2} and psi{1,3} arrays
(the latter currently in temparray) in the work5{1,2} arrays */
for(psiindex=0;psiindex<=1;psiindex++) {
jacobcalc(wrk3->work1[psiindex],wrk5->temparray[psiindex],
wrk4->work5[psiindex],procid,firstrow,lastrow,firstcol,lastcol,numrows,numcols);
}
/* set values of psim{1,3} to temparray{1,3} */
for(psiindex=0;psiindex<=1;psiindex++) {
if (procid == MASTER) {
fields->psim[psiindex][0][0] = wrk5->temparray[psiindex][0][0];
}
if (procid == nprocs-xprocs) {
fields->psim[psiindex][im-1][0] = wrk5->temparray[psiindex][im-1][0];
}
if (procid == xprocs-1) {
fields->psim[psiindex][0][jm-1] = wrk5->temparray[psiindex][0][jm-1];
}
if (procid == nprocs-1) {
fields->psim[psiindex][im-1][jm-1] = wrk5->temparray[psiindex][im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
fields->psim[psiindex][0][j] = wrk5->temparray[psiindex][0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
fields->psim[psiindex][im-1][j] = wrk5->temparray[psiindex][im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
fields->psim[psiindex][j][0] = wrk5->temparray[psiindex][j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
fields->psim[psiindex][j][jm-1] = wrk5->temparray[psiindex][j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
fields->psim[psiindex][i][iindex] = wrk5->temparray[psiindex][i][iindex];
}
}
}
/* put the laplacian of the work7{1,2} arrays in the work4{1,2}
arrays; second step in the three-laplacian friction calculation */
for(psiindex=0;psiindex<=1;psiindex++) {
laplacalc(wrk5->work7[psiindex],
wrk4->work4[psiindex],
firstrow,lastrow,firstcol,lastcol,numrows,numcols);
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_3,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
f o u r t h p h a s e
*******************************************************
put the jacobian of the work2 and work3 arrays in the work6
array */
jacobcalc(wrk3->work2,wrk2->work3,wrk6->work6,procid,firstrow,
lastrow,firstcol,lastcol,numrows,numcols);
/* put the laplacian of the work4{1,2} arrays in the work7{1,2}
arrays; third step in the three-laplacian friction calculation */
for(psiindex=0;psiindex<=1;psiindex++) {
laplacalc(wrk4->work4[psiindex],
wrk5->work7[psiindex],
firstrow,lastrow,firstcol,lastcol,numrows,numcols);
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_4,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
f i f t h p h a s e
*******************************************************
use the values of the work5, work6 and work7 arrays
computed in the previous time-steps to compute the
ga and gb arrays */
hinv = 1.0/h;
h1inv = 1.0/h1;
if (procid == MASTER) {
wrk1->ga[0][0] = wrk4->work5[0][0][0]-wrk4->work5[1][0][0]+eig2*wrk6->work6[0][0]+h1inv*
frcng->tauz[0][0]+lf*wrk5->work7[0][0][0]-lf*wrk5->work7[1][0][0];
wrk1->gb[0][0] = hh1*wrk4->work5[0][0][0]+hh3*wrk4->work5[1][0][0]+hinv*frcng->tauz[0][0]+
lf*hh1*wrk5->work7[0][0][0]+lf*hh3*wrk5->work7[1][0][0];
}
if (procid == nprocs-xprocs) {
wrk1->ga[im-1][0] = wrk4->work5[0][im-1][0]-wrk4->work5[1][im-1][0]+eig2*wrk6->work6[im-1][0]+h1inv*
frcng->tauz[im-1][0]+lf*wrk5->work7[0][im-1][0]-lf*wrk5->work7[1][im-1][0];
wrk1->gb[im-1][0] = hh1*wrk4->work5[0][im-1][0]+hh3*wrk4->work5[1][im-1][0]+hinv*frcng->tauz[im-1][0]+
lf*hh1*wrk5->work7[0][im-1][0]+lf*hh3*wrk5->work7[1][im-1][0];
}
if (procid == xprocs-1) {
wrk1->ga[0][jm-1] = wrk4->work5[0][0][jm-1]-wrk4->work5[1][0][jm-1]+eig2*wrk6->work6[0][jm-1]+h1inv*
frcng->tauz[0][jm-1]+lf*wrk5->work7[0][0][jm-1]-lf*wrk5->work7[1][0][jm-1];
wrk1->gb[0][jm-1] = hh1*wrk4->work5[0][0][jm-1]+hh3*wrk4->work5[1][0][jm-1]+hinv*frcng->tauz[0][jm-1]+
lf*hh1*wrk5->work7[0][0][jm-1]+lf*hh3*wrk5->work7[1][0][jm-1];
}
if (procid == nprocs-1) {
wrk1->ga[im-1][jm-1] = wrk4->work5[0][im-1][jm-1]-wrk4->work5[1][im-1][jm-1]+eig2*wrk6->work6[im-1][jm-1]+
h1inv*frcng->tauz[im-1][jm-1]+lf*wrk5->work7[0][im-1][jm-1]-lf*wrk5->work7[1][im-1][jm-1];
wrk1->gb[im-1][jm-1] = hh1*wrk4->work5[0][im-1][jm-1]+hh3*wrk4->work5[1][im-1][jm-1]+hinv*
frcng->tauz[im-1][jm-1]+lf*hh1*wrk5->work7[0][im-1][jm-1]+lf*hh3*wrk5->work7[1][im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk1->ga[0][j] = wrk4->work5[0][0][j]-wrk4->work5[1][0][j]+eig2*
wrk6->work6[0][j]+h1inv*frcng->tauz[0][j]+lf*wrk5->work7[0][0][j]-lf*wrk5->work7[0][0][j];
wrk1->gb[0][j] = hh1*wrk4->work5[0][0][j]+hh3*wrk4->work5[1][0][j]+hinv*
frcng->tauz[0][j]+lf*hh1*wrk5->work7[0][0][j]+lf*hh3*wrk5->work7[1][0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk1->ga[im-1][j] = wrk4->work5[0][im-1][j]-wrk4->work5[1][im-1][j]+eig2*
wrk6->work6[im-1][j]+h1inv*frcng->tauz[im-1][j]+
lf*wrk5->work7[0][im-1][j]-lf*wrk5->work7[1][im-1][j];
wrk1->gb[im-1][j] = hh1*wrk4->work5[0][im-1][j]+hh3*wrk4->work5[1][im-1][j]+hinv*
frcng->tauz[im-1][j]+lf*hh1*wrk5->work7[0][im-1][j]+
lf*hh3*wrk5->work7[1][im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk1->ga[j][0] = wrk4->work5[0][j][0]-wrk4->work5[1][j][0]+eig2*
wrk6->work6[j][0]+h1inv*frcng->tauz[j][0]+lf*wrk5->work7[0][j][0]-lf*wrk5->work7[1][j][0];
wrk1->gb[j][0] = hh1*wrk4->work5[0][j][0]+hh3*wrk4->work5[1][j][0]+hinv*
frcng->tauz[j][0]+lf*hh1*wrk5->work7[0][j][0]+lf*hh3*wrk5->work7[1][j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk1->ga[j][jm-1] = wrk4->work5[0][j][jm-1]-wrk4->work5[1][j][jm-1]+eig2*
wrk6->work6[j][jm-1]+h1inv*frcng->tauz[j][jm-1]+
lf*wrk5->work7[0][j][jm-1]-lf*wrk5->work7[1][j][jm-1];
wrk1->gb[j][jm-1] = hh1*wrk4->work5[0][j][jm-1]+hh3*wrk4->work5[1][j][jm-1]+hinv*
frcng->tauz[j][jm-1]+lf*hh1*wrk5->work7[0][j][jm-1]+
lf*hh3*wrk5->work7[1][j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk1->ga[i][iindex] = wrk4->work5[0][i][iindex]-wrk4->work5[1][i][iindex]+eig2*
wrk6->work6[i][iindex]+h1inv*frcng->tauz[i][iindex]+
lf*wrk5->work7[0][i][iindex]-lf*wrk5->work7[1][i][iindex];
wrk1->gb[i][iindex] = hh1*wrk4->work5[0][i][iindex]+hh3*wrk4->work5[1][i][iindex]+hinv*
frcng->tauz[i][iindex]+lf*hh1*wrk5->work7[0][i][iindex]+
lf*hh3*wrk5->work7[1][i][iindex];
}
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_5,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
s i x t h p h a s e
******************************************************* */
istart = gp[procid].rel_start_y[numlev-1];
iend = istart + gp[procid].rel_num_y[numlev-1] - 1;
jstart = gp[procid].rel_start_x[numlev-1];
jend = jstart + gp[procid].rel_num_x[numlev-1] - 1;
ist = istart;
ien = iend;
jst = jstart;
jen = jend;
if (istart == 1) {
istart = 0;
}
if (jstart == 1) {
jstart = 0;
}
if (iend == im-2) {
iend = im-1;
}
if (jend == jm-2) {
jend = jm-1;
}
for(i=istart;i<=iend;i++) {
for(j=jstart;j<=jend;j++) {
multi->rhs_multi[numlev-1][i][j] = wrk1->ga[i][j] * ressqr;
}
}
if (istart == 0) {
for(j=jstart;j<=jend;j++) {
multi->q_multi[numlev-1][0][j] = wrk1->ga[0][j];
}
}
if (iend == im-1) {
for(j=jstart;j<=jend;j++) {
multi->q_multi[numlev-1][im-1][j] = wrk1->ga[im-1][j];
}
}
if (jstart == 0) {
for(i=istart;i<=iend;i++) {
multi->q_multi[numlev-1][i][0] = wrk1->ga[i][0];
}
}
if (jend == jm-1) {
for(i=istart;i<=iend;i++) {
multi->q_multi[numlev-1][i][jm-1] = wrk1->ga[i][jm-1];
}
}
fac = 1.0 / (4.0 - ressqr*eig2);
for(i=ist;i<=ien;i++) {
for(j=jst;j<=jen;j++) {
multi->q_multi[numlev-1][i][j] = guess->oldga[i][j];
}
}
if ((procid == MASTER) || (do_stats)) {
CLOCK(multi_start);
}
multig(procid);
if ((procid == MASTER) || (do_stats)) {
CLOCK(multi_end);
gp[procid].multi_time += (multi_end - multi_start);
}
if (procid == MASTER) {
global->psiai=0.0;
}
/* copy the solution for use as initial guess in next time-step */
for(i=istart;i<=iend;i++) {
for(j=jstart;j<=jend;j++) {
wrk1->ga[i][j] = multi->q_multi[numlev-1][i][j];
guess->oldga[i][j] = multi->q_multi[numlev-1][i][j];
}
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_6,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
s e v e n t h p h a s e
*******************************************************
every process computes the running sum for its assigned portion
in a private variable psiaipriv */
psiaipriv=0.0;
if (procid == MASTER) {
psiaipriv = psiaipriv + 0.25*(wrk1->ga[0][0]);
}
if (procid == xprocs - 1) {
psiaipriv = psiaipriv + 0.25*(wrk1->ga[0][jm-1]);
}
if (procid == nprocs-xprocs) {
psiaipriv=psiaipriv+0.25*(wrk1->ga[im-1][0]);
}
if (procid == nprocs-1) {
psiaipriv=psiaipriv+0.25*(wrk1->ga[im-1][jm-1]);
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
psiaipriv = psiaipriv + 0.5*wrk1->ga[0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
psiaipriv = psiaipriv + 0.5*wrk1->ga[im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
psiaipriv = psiaipriv + 0.5*wrk1->ga[j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
psiaipriv = psiaipriv + 0.5*wrk1->ga[j][jm-1];
}
}
for(iindex=firstcol;iindex<=lastcol;iindex++) {
for(i=firstrow;i<=lastrow;i++) {
psiaipriv = psiaipriv + wrk1->ga[i][iindex];
}
}
/* after computing its private sum, every process adds that to the
shared running sum psiai */
LOCK(locks->psibilock)
global->psiai = global->psiai + psiaipriv;
UNLOCK(locks->psibilock)
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_7,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
e i g h t h p h a s e
*******************************************************
augment ga(i,j) with [-psiai/psibi]*psib(i,j)
%%%%%%%%%%%%%%% f4 should be private */
f4 = (-global->psiai)/(global->psibi);
if (procid == MASTER) {
wrk1->ga[0][0] = wrk1->ga[0][0]+f4*wrk1->psib[0][0];
}
if (procid == nprocs-xprocs) {
wrk1->ga[im-1][0] = wrk1->ga[im-1][0]+f4*wrk1->psib[im-1][0];
}
if (procid == xprocs-1) {
wrk1->ga[0][jm-1] = wrk1->ga[0][jm-1]+f4*wrk1->psib[0][jm-1];
}
if (procid == nprocs-1) {
wrk1->ga[im-1][jm-1] = wrk1->ga[im-1][jm-1]+f4*wrk1->psib[im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk1->ga[0][j] = wrk1->ga[0][j]+f4*wrk1->psib[0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk1->ga[im-1][j] = wrk1->ga[im-1][j]+f4*wrk1->psib[im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk1->ga[j][0] = wrk1->ga[j][0]+f4*wrk1->psib[j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk1->ga[j][jm-1] = wrk1->ga[j][jm-1]+f4*wrk1->psib[j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk1->ga[i][iindex] = wrk1->ga[i][iindex]+f4*wrk1->psib[i][iindex];
}
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_8,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
for(i=istart;i<=iend;i++) {
for(j=jstart;j<=jend;j++) {
multi->rhs_multi[numlev-1][i][j] = wrk1->gb[i][j] * ressqr;
}
}
if (istart == 0) {
for(j=jstart;j<=jend;j++) {
multi->q_multi[numlev-1][0][j] = wrk1->gb[0][j];
}
}
if (iend == im-1) {
for(j=jstart;j<=jend;j++) {
multi->q_multi[numlev-1][im-1][j] = wrk1->gb[im-1][j];
}
}
if (jstart == 0) {
for(i=istart;i<=iend;i++) {
multi->q_multi[numlev-1][i][0] = wrk1->gb[i][0];
}
}
if (jend == jm-1) {
for(i=istart;i<=iend;i++) {
multi->q_multi[numlev-1][i][jm-1] = wrk1->gb[i][jm-1];
}
}
fac = 1.0 / (4.0 - ressqr*eig2);
for(i=ist;i<=ien;i++) {
for(j=jst;j<=jen;j++) {
multi->q_multi[numlev-1][i][j] = guess->oldgb[i][j];
}
}
if ((procid == MASTER) || (do_stats)) {
CLOCK(multi_start);
}
multig(procid);
if ((procid == MASTER) || (do_stats)) {
CLOCK(multi_end);
gp[procid].multi_time += (multi_end - multi_start);
}
for(i=istart;i<=iend;i++) {
for(j=jstart;j<=jend;j++) {
wrk1->gb[i][j] = multi->q_multi[numlev-1][i][j];
guess->oldgb[i][j] = multi->q_multi[numlev-1][i][j];
}
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_8,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
n i n t h p h a s e
*******************************************************
put appropriate linear combinations of ga and gb in work2 and work3;
note that here (as in most cases) the constant multipliers are made
private variables; the specific order in which things are done is
chosen in order to hopefully reuse things brought into the cache
note that here again we choose to have all processes share the work
on both matrices despite the fact that the work done per element
is the same, because the operand matrices are the same in both cases */
if (procid == MASTER) {
wrk3->work2[0][0] = wrk1->gb[0][0]-hh1*wrk1->ga[0][0];
wrk2->work3[0][0] = wrk1->gb[0][0]+hh3*wrk1->ga[0][0];
}
if (procid == nprocs-xprocs) {
wrk3->work2[im-1][0] = wrk1->gb[im-1][0]-hh1*wrk1->ga[im-1][0];
wrk2->work3[im-1][0] = wrk1->gb[im-1][0]+hh3*wrk1->ga[im-1][0];
}
if (procid == xprocs-1) {
wrk3->work2[0][jm-1] = wrk1->gb[0][jm-1]-hh1*wrk1->ga[0][jm-1];
wrk2->work3[0][jm-1] = wrk1->gb[0][jm-1]+hh3*wrk1->ga[0][jm-1];
}
if (procid == nprocs-1) {
wrk3->work2[im-1][jm-1] = wrk1->gb[im-1][jm-1]-hh1*wrk1->ga[im-1][jm-1];
wrk2->work3[im-1][jm-1] = wrk1->gb[im-1][jm-1]+hh3*wrk1->ga[im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
wrk2->work3[0][j] = wrk1->gb[0][j]+hh3*wrk1->ga[0][j];
wrk3->work2[0][j] = wrk1->gb[0][j]-hh1*wrk1->ga[0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
wrk2->work3[im-1][j] = wrk1->gb[im-1][j]+hh3*wrk1->ga[im-1][j];
wrk3->work2[im-1][j] = wrk1->gb[im-1][j]-hh1*wrk1->ga[im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
wrk2->work3[j][0] = wrk1->gb[j][0]+hh3*wrk1->ga[j][0];
wrk3->work2[j][0] = wrk1->gb[j][0]-hh1*wrk1->ga[j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
wrk2->work3[j][jm-1] = wrk1->gb[j][jm-1]+hh3*wrk1->ga[j][jm-1];
wrk3->work2[j][jm-1] = wrk1->gb[j][jm-1]-hh1*wrk1->ga[j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
wrk2->work3[i][iindex] = wrk1->gb[i][iindex]+hh3*wrk1->ga[i][iindex];
wrk3->work2[i][iindex] = wrk1->gb[i][iindex]-hh1*wrk1->ga[i][iindex];
}
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_9,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
/* *******************************************************
t e n t h p h a s e
*******************************************************/
timst = 2*dtau;
/* update the psi{1,3} matrices by adding 2*dtau*work3 to each */
if (procid == MASTER) {
fields->psi[0][0][0] = fields->psi[0][0][0] + timst*wrk2->work3[0][0];
}
if (procid == nprocs-xprocs) {
fields->psi[0][im-1][0] = fields->psi[0][im-1][0] + timst*wrk2->work3[im-1][0];
}
if (procid == xprocs-1) {
fields->psi[0][0][jm-1] = fields->psi[0][0][jm-1] + timst*wrk2->work3[0][jm-1];
}
if (procid == nprocs-1) {
fields->psi[0][im-1][jm-1] = fields->psi[0][im-1][jm-1] + timst*wrk2->work3[im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
fields->psi[0][0][j] = fields->psi[0][0][j] + timst*wrk2->work3[0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
fields->psi[0][im-1][j] = fields->psi[0][im-1][j] + timst*wrk2->work3[im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
fields->psi[0][j][0] = fields->psi[0][j][0] + timst*wrk2->work3[j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
fields->psi[0][j][jm-1] = fields->psi[0][j][jm-1] + timst*wrk2->work3[j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
fields->psi[0][i][iindex] = fields->psi[0][i][iindex] + timst*wrk2->work3[i][iindex];
}
}
if (procid == MASTER) {
fields->psi[1][0][0] = fields->psi[1][0][0] + timst*wrk3->work2[0][0];
}
if (procid == nprocs-xprocs) {
fields->psi[1][im-1][0] = fields->psi[1][im-1][0] + timst*wrk3->work2[im-1][0];
}
if (procid == xprocs-1) {
fields->psi[1][0][jm-1] = fields->psi[1][0][jm-1] + timst*wrk3->work2[0][jm-1];
}
if (procid == nprocs-1) {
fields->psi[1][im-1][jm-1] = fields->psi[1][im-1][jm-1] + timst*wrk3->work2[im-1][jm-1];
}
if (firstrow == 1) {
for(j=firstcol;j<=lastcol;j++) {
fields->psi[1][0][j] = fields->psi[1][0][j] + timst*wrk3->work2[0][j];
}
}
if ((firstrow+numrows) == im-1) {
for(j=firstcol;j<=lastcol;j++) {
fields->psi[1][im-1][j] = fields->psi[1][im-1][j] + timst*wrk3->work2[im-1][j];
}
}
if (firstcol == 1) {
for(j=firstrow;j<=lastrow;j++) {
fields->psi[1][j][0] = fields->psi[1][j][0] + timst*wrk3->work2[j][0];
}
}
if ((firstcol+numcols) == jm-1) {
for(j=firstrow;j<=lastrow;j++) {
fields->psi[1][j][jm-1] = fields->psi[1][j][jm-1] + timst*wrk3->work2[j][jm-1];
}
}
for(i=firstrow;i<=lastrow;i++) {
for(iindex=firstcol;iindex<=lastcol;iindex++) {
fields->psi[1][i][iindex] = fields->psi[1][i][iindex] + timst*wrk3->work2[i][iindex];
}
}
#if defined(MULTIPLE_BARRIERS)
BARRIER(bars->sl_phase_10,nprocs)
#else
BARRIER(bars->barrier,nprocs)
#endif
}