831 lines
24 KiB
C
831 lines
24 KiB
C
/*************************************************************************/
|
|
/* */
|
|
/* Copyright (c) 1994 Stanford University */
|
|
/* */
|
|
/* All rights reserved. */
|
|
/* */
|
|
/* Permission is given to use, copy, and modify this software for any */
|
|
/* non-commercial purpose as long as this copyright notice is not */
|
|
/* removed. All other uses, including redistribution in whole or in */
|
|
/* part, are forbidden without prior written permission. */
|
|
/* */
|
|
/* This software is provided with absolutely no warranty and no */
|
|
/* support. */
|
|
/* */
|
|
/*************************************************************************/
|
|
|
|
/* ****************
|
|
subroutine slave
|
|
**************** */
|
|
|
|
EXTERN_ENV
|
|
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include <time.h>
|
|
#include <stdlib.h>
|
|
#include "decs.h"
|
|
|
|
void slave()
|
|
{
|
|
long i;
|
|
long j;
|
|
long nstep;
|
|
long iindex;
|
|
long iday;
|
|
double ysca1;
|
|
double y;
|
|
double factor;
|
|
double sintemp;
|
|
double curlt;
|
|
double ressqr;
|
|
long istart;
|
|
long iend;
|
|
long jstart;
|
|
long jend;
|
|
long ist;
|
|
long ien;
|
|
long jst;
|
|
long jen;
|
|
double fac;
|
|
long dayflag=0;
|
|
long dhourflag=0;
|
|
long endflag=0;
|
|
long firstrow;
|
|
long lastrow;
|
|
long numrows;
|
|
long firstcol;
|
|
long lastcol;
|
|
long numcols;
|
|
long psiindex;
|
|
double psibipriv;
|
|
double ttime;
|
|
double dhour;
|
|
double day;
|
|
long procid;
|
|
long psinum;
|
|
long j_off = 0;
|
|
unsigned long t1;
|
|
double **t2a;
|
|
double **t2b;
|
|
double *t1a;
|
|
double *t1b;
|
|
double *t1c;
|
|
double *t1d;
|
|
|
|
ressqr = lev_res[numlev-1] * lev_res[numlev-1];
|
|
|
|
LOCK(locks->idlock)
|
|
procid = global->id;
|
|
global->id = global->id+1;
|
|
UNLOCK(locks->idlock)
|
|
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_prini,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
|
processors to avoid migration. */
|
|
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might distribute
|
|
data structures across physically distributed memories as
|
|
desired.
|
|
|
|
One way to do this is as follows. The function allocate(START,SIZE,I)
|
|
is assumed to place all addresses x such that
|
|
(START <= x < START+SIZE) on node I.
|
|
|
|
long d_size;
|
|
unsigned long g_size;
|
|
unsigned long mg_size;
|
|
|
|
if (procid == MASTER) {
|
|
g_size = ((jmx[numlev-1]-2)/xprocs+2)*((imx[numlev-1]-2)/yprocs+2)*siz
|
|
eof(double) +
|
|
((imx[numlev-1]-2)/yprocs+2)*sizeof(double *);
|
|
|
|
mg_size = numlev*sizeof(double **);
|
|
for (i=0;i<numlev;i++) {
|
|
mg_size+=((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
|
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
|
}
|
|
for (i= 0;i<nprocs;i++) {
|
|
d_size = 2*sizeof(double **);
|
|
allocate((unsigned long) psi[i],d_size,i);
|
|
allocate((unsigned long) psim[i],d_size,i);
|
|
allocate((unsigned long) work1[i],d_size,i);
|
|
allocate((unsigned long) work4[i],d_size,i);
|
|
allocate((unsigned long) work5[i],d_size,i);
|
|
allocate((unsigned long) work7[i],d_size,i);
|
|
allocate((unsigned long) temparray[i],d_size,i);
|
|
allocate((unsigned long) psi[i][0],g_size,i);
|
|
allocate((unsigned long) psi[i][1],g_size,i);
|
|
allocate((unsigned long) psim[i][0],g_size,i);
|
|
allocate((unsigned long) psim[i][1],g_size,i);
|
|
allocate((unsigned long) psium[i],g_size,i);
|
|
allocate((unsigned long) psilm[i],g_size,i);
|
|
allocate((unsigned long) psib[i],g_size,i);
|
|
allocate((unsigned long) ga[i],g_size,i);
|
|
allocate((unsigned long) gb[i],g_size,i);
|
|
allocate((unsigned long) work1[i][0],g_size,i);
|
|
allocate((unsigned long) work1[i][1],g_size,i);
|
|
allocate((unsigned long) work2[i],g_size,i);
|
|
allocate((unsigned long) work3[i],g_size,i);
|
|
allocate((unsigned long) work4[i][0],g_size,i);
|
|
allocate((unsigned long) work4[i][1],g_size,i);
|
|
allocate((unsigned long) work5[i][0],g_size,i);
|
|
allocate((unsigned long) work5[i][1],g_size,i);
|
|
allocate((unsigned long) work6[i],g_size,i);
|
|
allocate((unsigned long) work7[i][0],g_size,i);
|
|
allocate((unsigned long) work7[i][1],g_size,i);
|
|
allocate((unsigned long) temparray[i][0],g_size,i);
|
|
allocate((unsigned long) temparray[i][1],g_size,i);
|
|
allocate((unsigned long) tauz[i],g_size,i);
|
|
allocate((unsigned long) oldga[i],g_size,i);
|
|
allocate((unsigned long) oldgb[i],g_size,i);
|
|
d_size = numlev * sizeof(long);
|
|
allocate((unsigned long) gp[i].rel_num_x,d_size,i);
|
|
allocate((unsigned long) gp[i].rel_num_y,d_size,i);
|
|
allocate((unsigned long) gp[i].eist,d_size,i);
|
|
allocate((unsigned long) gp[i].ejst,d_size,i);
|
|
allocate((unsigned long) gp[i].oist,d_size,i);
|
|
allocate((unsigned long) gp[i].ojst,d_size,i);
|
|
allocate((unsigned long) gp[i].rlist,d_size,i);
|
|
allocate((unsigned long) gp[i].rljst,d_size,i);
|
|
allocate((unsigned long) gp[i].rlien,d_size,i);
|
|
allocate((unsigned long) gp[i].rljen,d_size,i);
|
|
|
|
allocate((unsigned long) q_multi[i],mg_size,i);
|
|
allocate((unsigned long) rhs_multi[i],mg_size,i);
|
|
allocate((unsigned long) &(gp[i]),sizeof(struct Global_Private),i);
|
|
}
|
|
}
|
|
|
|
*/
|
|
|
|
t2a = (double **) oldga[procid];
|
|
t2b = (double **) oldgb[procid];
|
|
for (i=0;i<im;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for (j=0;j<jm;j++) {
|
|
t1a[j] = 0.0;
|
|
t1b[j] = 0.0;
|
|
}
|
|
}
|
|
|
|
firstcol = 1;
|
|
lastcol = firstcol + gp[procid].rel_num_x[numlev-1] - 1;
|
|
firstrow = 1;
|
|
lastrow = firstrow + gp[procid].rel_num_y[numlev-1] - 1;
|
|
numcols = gp[procid].rel_num_x[numlev-1];
|
|
numrows = gp[procid].rel_num_y[numlev-1];
|
|
j_off = gp[procid].colnum*numcols;
|
|
|
|
if (procid > nprocs/2) {
|
|
psinum = 2;
|
|
} else {
|
|
psinum = 1;
|
|
}
|
|
|
|
/* every process gets its own copy of the timing variables to avoid
|
|
contention at shared memory locations. here, these variables
|
|
are initialized. */
|
|
|
|
ttime = 0.0;
|
|
dhour = 0.0;
|
|
nstep = 0 ;
|
|
day = 0.0;
|
|
|
|
ysca1 = 0.5*ysca;
|
|
if (procid == MASTER) {
|
|
t1a = (double *) f;
|
|
for (iindex = 0;iindex<=jmx[numlev-1]-1;iindex++) {
|
|
y = ((double) iindex)*res;
|
|
t1a[iindex] = f0+beta*(y-ysca1);
|
|
}
|
|
}
|
|
|
|
t2a = (double **) psium[procid];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1]=0.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 0.0;
|
|
}
|
|
}
|
|
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
t2a = (double **) psilm[procid];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1]=0.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 0.0;
|
|
}
|
|
}
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
|
|
t2a = (double **) psib[procid];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0]=1.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1]=1.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0]=1.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1]=1.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 1.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 1.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 1.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 1.0;
|
|
}
|
|
}
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
|
|
/* wait until all processes have completed the above initialization */
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_prini,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
/* compute psib array (one-time computation) and integrate into psibi */
|
|
|
|
istart = 1;
|
|
iend = istart + gp[procid].rel_num_y[numlev-1] - 1;
|
|
jstart = 1;
|
|
jend = jstart + gp[procid].rel_num_x[numlev-1] - 1;
|
|
ist = istart;
|
|
ien = iend;
|
|
jst = jstart;
|
|
jen = jend;
|
|
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
istart = 0;
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
jstart = 0;
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
iend = im-1;
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
jend = jm-1;
|
|
}
|
|
|
|
t2a = (double **) rhs_multi[procid][numlev-1];
|
|
t2b = (double **) psib[procid];
|
|
for(i=istart;i<=iend;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for(j=jstart;j<=jend;j++) {
|
|
t1a[j] = t1b[j] * ressqr;
|
|
}
|
|
}
|
|
t2a = (double **) q_multi[procid][numlev-1];
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
t1b = (double *) t2b[0];
|
|
for(j=jstart;j<=jend;j++) {
|
|
t1a[j] = t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
t1b = (double *) t2b[im-1];
|
|
for(j=jstart;j<=jend;j++) {
|
|
t1a[j] = t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(i=istart;i<=iend;i++) {
|
|
t2a[i][0] = t2b[i][0];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(i=istart;i<=iend;i++) {
|
|
t2a[i][jm-1] = t2b[i][jm-1];
|
|
}
|
|
}
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_psini,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
t2a = (double **) psib[procid];
|
|
j = gp[procid].neighbors[UP];
|
|
if (j != -1) {
|
|
t1a = (double *) t2a[0];
|
|
t1b = (double *) psib[j][im-2];
|
|
for (i=1;i<jm-1;i++) {
|
|
t1a[i] = t1b[i];
|
|
}
|
|
}
|
|
j = gp[procid].neighbors[DOWN];
|
|
if (j != -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
t1b = (double *) psib[j][1];
|
|
for (i=1;i<jm-1;i++) {
|
|
t1a[i] = t1b[i];
|
|
}
|
|
}
|
|
j = gp[procid].neighbors[LEFT];
|
|
if (j != -1) {
|
|
t2b = (double **) psib[j];
|
|
for (i=1;i<im-1;i++) {
|
|
t2a[i][0] = t2b[i][jm-2];
|
|
}
|
|
}
|
|
j = gp[procid].neighbors[RIGHT];
|
|
if (j != -1) {
|
|
t2b = (double **) psib[j];
|
|
for (i=1;i<im-1;i++) {
|
|
t2a[i][jm-1] = t2b[i][1];
|
|
}
|
|
}
|
|
|
|
t2a = (double **) q_multi[procid][numlev-1];
|
|
t2b = (double **) psib[procid];
|
|
fac = 1.0 / (4.0 - ressqr*eig2);
|
|
for(i=ist;i<=ien;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
t1c = (double *) t2b[i-1];
|
|
t1d = (double *) t2b[i+1];
|
|
for(j=jst;j<=jen;j++) {
|
|
t1a[j] = fac * (t1d[j]+t1c[j]+t1b[j+1]+t1b[j-1] -
|
|
ressqr*t1b[j]);
|
|
}
|
|
}
|
|
|
|
multig(procid);
|
|
|
|
for(i=istart;i<=iend;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for(j=jstart;j<=jend;j++) {
|
|
t1b[j] = t1a[j];
|
|
}
|
|
}
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_prini,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
/* update the local running sum psibipriv by summing all the resulting
|
|
values in that process's share of the psib matrix */
|
|
|
|
t2a = (double **) psib[procid];
|
|
psibipriv=0.0;
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
psibipriv = psibipriv + 0.25*(t2a[0][0]);
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
psibipriv = psibipriv + 0.25*(t2a[0][jm-1]);
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
psibipriv=psibipriv+0.25*(t2a[im-1][0]);
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
psibipriv=psibipriv+0.25*(t2a[im-1][jm-1]);
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
psibipriv = psibipriv + 0.5*t1a[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
psibipriv = psibipriv + 0.5*t1a[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
psibipriv = psibipriv + 0.5*t2a[j][0];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
psibipriv = psibipriv + 0.5*t2a[j][jm-1];
|
|
}
|
|
}
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
psibipriv = psibipriv + t1a[iindex];
|
|
}
|
|
}
|
|
|
|
/* update the shared variable psibi by summing all the psibiprivs
|
|
of the individual processes into it. note that this combined
|
|
private and shared sum method avoids accessing the shared
|
|
variable psibi once for every element of the matrix. */
|
|
|
|
LOCK(locks->psibilock)
|
|
global->psibi = global->psibi + psibipriv;
|
|
UNLOCK(locks->psibilock)
|
|
|
|
/* initialize psim matrices
|
|
|
|
if there is more than one process, then split the processes
|
|
between the two psim matrices; otherwise, let the single process
|
|
work on one first and then the other */
|
|
|
|
for(psiindex=0;psiindex<=1;psiindex++) {
|
|
t2a = (double **) psim[procid][psiindex];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1] = 0.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 0.0;
|
|
}
|
|
}
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* initialize psi matrices the same way */
|
|
|
|
for(psiindex=0;psiindex<=1;psiindex++) {
|
|
t2a = (double **) psi[procid][psiindex];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1] = 0.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 0.0;
|
|
}
|
|
}
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* compute input curl of wind stress */
|
|
|
|
t2a = (double **) tauz[procid];
|
|
ysca1 = .5*ysca;
|
|
factor= -t0*pi/ysca1;
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
t2a[0][jm-1] = factor*sintemp;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
t2a[im-1][jm-1] = factor*sintemp;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
sintemp = pi*((double) j+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
curlt = factor*sintemp;
|
|
t1a[j] = curlt;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
sintemp = pi*((double) j+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
curlt = factor*sintemp;
|
|
t1a[j] = curlt;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
curlt = factor*sintemp;
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = curlt;
|
|
}
|
|
}
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
sintemp = pi*((double) iindex+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
curlt = factor*sintemp;
|
|
t1a[iindex] = curlt;
|
|
}
|
|
}
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_onetime,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
|
|
/***************************************************************
|
|
one-time stuff over at this point
|
|
***************************************************************/
|
|
|
|
while (!endflag) {
|
|
while ((!dayflag) || (!dhourflag)) {
|
|
dayflag = 0;
|
|
dhourflag = 0;
|
|
if (nstep == 1) {
|
|
if (procid == MASTER) {
|
|
CLOCK(global->trackstart)
|
|
}
|
|
if ((procid == MASTER) || (do_stats)) {
|
|
CLOCK(t1);
|
|
gp[procid].total_time = t1;
|
|
gp[procid].multi_time = 0;
|
|
}
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
|
statistics that one is measuring about the parallel execution */
|
|
}
|
|
|
|
slave2(procid,firstrow,lastrow,numrows,firstcol,lastcol,numcols);
|
|
|
|
/* update time and step number
|
|
note that these time and step variables are private i.e. every
|
|
process has its own copy and keeps track of its own time */
|
|
|
|
ttime = ttime + dtau;
|
|
nstep = nstep + 1;
|
|
day = ttime/86400.0;
|
|
|
|
if (day > ((double) outday0)) {
|
|
dayflag = 1;
|
|
iday = (long) day;
|
|
dhour = dhour+dtau;
|
|
if (dhour >= 86400.0) {
|
|
dhourflag = 1;
|
|
}
|
|
}
|
|
}
|
|
dhour = 0.0;
|
|
|
|
t2a = (double **) psium[procid];
|
|
t2b = (double **) psim[procid][0];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = t2a[0][0]+t2b[0][0];
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = t2a[im-1][0]+t2b[im-1][0];
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1] = t2a[0][jm-1]+t2b[0][jm-1];
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1] = t2a[im-1][jm-1] +
|
|
t2b[im-1][jm-1];
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
t1b = (double *) t2b[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = t1a[j]+t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
t1b = (double *) t2b[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = t1a[j] + t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = t2a[j][0]+t2b[j][0];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = t2a[j][jm-1] +
|
|
t2b[j][jm-1];
|
|
}
|
|
}
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = t1a[iindex] + t1b[iindex];
|
|
}
|
|
}
|
|
|
|
/* update values of psilm array to psilm + psim[2] */
|
|
|
|
t2a = (double **) psilm[procid];
|
|
t2b = (double **) psim[procid][1];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = t2a[0][0]+t2b[0][0];
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = t2a[im-1][0]+t2b[im-1][0];
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1] = t2a[0][jm-1]+t2b[0][jm-1];
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1] = t2a[im-1][jm-1] +
|
|
t2b[im-1][jm-1];
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
t1b = (double *) t2b[0];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = t1a[j]+t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
t1b = (double *) t2b[im-1];
|
|
for(j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = t1a[j]+t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = t2a[j][0]+t2b[j][0];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for(j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = t2a[j][jm-1] + t2b[j][jm-1];
|
|
}
|
|
}
|
|
for(i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = t1a[iindex] + t1b[iindex];
|
|
}
|
|
}
|
|
if (iday >= (long) outday3) {
|
|
endflag = 1;
|
|
}
|
|
}
|
|
if ((procid == MASTER) || (do_stats)) {
|
|
CLOCK(t1);
|
|
gp[procid].total_time = t1-gp[procid].total_time;
|
|
}
|
|
}
|
|
|