Sanchayan Maity
2fcc51c2c1
While at it also add the libpthread static library amd m5op_x86 for matrix multiplication test code as well. Note that the splash2 benchmark code does not comply with gem5 coding guidelines. Academic guys never seem to follow 80 columns and no whitespace guideline :(.
832 lines
24 KiB
C
832 lines
24 KiB
C
/*************************************************************************/
|
|
/* */
|
|
/* Copyright (c) 1994 Stanford University */
|
|
/* */
|
|
/* All rights reserved. */
|
|
/* */
|
|
/* Permission is given to use, copy, and modify this software for any */
|
|
/* non-commercial purpose as long as this copyright notice is not */
|
|
/* removed. All other uses, including redistribution in whole or in */
|
|
/* part, are forbidden without prior written permission. */
|
|
/* */
|
|
/* This software is provided with absolutely no warranty and no */
|
|
/* support. */
|
|
/* */
|
|
/*************************************************************************/
|
|
|
|
/* ****************
|
|
subroutine slave
|
|
**************** */
|
|
|
|
EXTERN_ENV
|
|
|
|
#include <cmath>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <ctime>
|
|
|
|
#include "decs.h"
|
|
|
|
void slave()
|
|
{
|
|
long i;
|
|
long j;
|
|
long nstep;
|
|
long iindex;
|
|
long iday;
|
|
double ysca1;
|
|
double y;
|
|
double factor;
|
|
double sintemp;
|
|
double curlt;
|
|
double ressqr;
|
|
long istart;
|
|
long iend;
|
|
long jstart;
|
|
long jend;
|
|
long ist;
|
|
long ien;
|
|
long jst;
|
|
long jen;
|
|
double fac;
|
|
long dayflag=0;
|
|
long dhourflag=0;
|
|
long endflag=0;
|
|
long firstrow;
|
|
long lastrow;
|
|
long numrows;
|
|
long firstcol;
|
|
long lastcol;
|
|
long numcols;
|
|
long psiindex;
|
|
double psibipriv;
|
|
double ttime;
|
|
double dhour;
|
|
double day;
|
|
long procid;
|
|
long psinum;
|
|
long j_off = 0;
|
|
unsigned long t1;
|
|
double **t2a;
|
|
double **t2b;
|
|
double *t1a;
|
|
double *t1b;
|
|
double *t1c;
|
|
double *t1d;
|
|
|
|
ressqr = lev_res[numlev-1] * lev_res[numlev-1];
|
|
|
|
LOCK(locks->idlock)
|
|
procid = global->id;
|
|
global->id = global->id+1;
|
|
UNLOCK(locks->idlock)
|
|
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_prini,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
|
processors to avoid migration. */
|
|
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might distribute
|
|
data structures across physically distributed memories as
|
|
desired.
|
|
|
|
One way to do this is as follows. The function allocate(START,SIZE,I)
|
|
is assumed to place all addresses x such that
|
|
(START <= x < START+SIZE) on node I.
|
|
|
|
long d_size;
|
|
unsigned long g_size;
|
|
unsigned long mg_size;
|
|
|
|
if (procid == MASTER) {
|
|
g_size = ((jmx[numlev-1]-2)/xprocs+2)*((imx[numlev-1]-2)/yprocs+2)*siz
|
|
eof(double) +
|
|
((imx[numlev-1]-2)/yprocs+2)*sizeof(double *);
|
|
|
|
mg_size = numlev*sizeof(double **);
|
|
for (i=0;i<numlev;i++) {
|
|
mg_size+=((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
|
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
|
}
|
|
for (i= 0;i<nprocs;i++) {
|
|
d_size = 2*sizeof(double **);
|
|
allocate((unsigned long) psi[i],d_size,i);
|
|
allocate((unsigned long) psim[i],d_size,i);
|
|
allocate((unsigned long) work1[i],d_size,i);
|
|
allocate((unsigned long) work4[i],d_size,i);
|
|
allocate((unsigned long) work5[i],d_size,i);
|
|
allocate((unsigned long) work7[i],d_size,i);
|
|
allocate((unsigned long) temparray[i],d_size,i);
|
|
allocate((unsigned long) psi[i][0],g_size,i);
|
|
allocate((unsigned long) psi[i][1],g_size,i);
|
|
allocate((unsigned long) psim[i][0],g_size,i);
|
|
allocate((unsigned long) psim[i][1],g_size,i);
|
|
allocate((unsigned long) psium[i],g_size,i);
|
|
allocate((unsigned long) psilm[i],g_size,i);
|
|
allocate((unsigned long) psib[i],g_size,i);
|
|
allocate((unsigned long) ga[i],g_size,i);
|
|
allocate((unsigned long) gb[i],g_size,i);
|
|
allocate((unsigned long) work1[i][0],g_size,i);
|
|
allocate((unsigned long) work1[i][1],g_size,i);
|
|
allocate((unsigned long) work2[i],g_size,i);
|
|
allocate((unsigned long) work3[i],g_size,i);
|
|
allocate((unsigned long) work4[i][0],g_size,i);
|
|
allocate((unsigned long) work4[i][1],g_size,i);
|
|
allocate((unsigned long) work5[i][0],g_size,i);
|
|
allocate((unsigned long) work5[i][1],g_size,i);
|
|
allocate((unsigned long) work6[i],g_size,i);
|
|
allocate((unsigned long) work7[i][0],g_size,i);
|
|
allocate((unsigned long) work7[i][1],g_size,i);
|
|
allocate((unsigned long) temparray[i][0],g_size,i);
|
|
allocate((unsigned long) temparray[i][1],g_size,i);
|
|
allocate((unsigned long) tauz[i],g_size,i);
|
|
allocate((unsigned long) oldga[i],g_size,i);
|
|
allocate((unsigned long) oldgb[i],g_size,i);
|
|
d_size = numlev * sizeof(long);
|
|
allocate((unsigned long) gp[i].rel_num_x,d_size,i);
|
|
allocate((unsigned long) gp[i].rel_num_y,d_size,i);
|
|
allocate((unsigned long) gp[i].eist,d_size,i);
|
|
allocate((unsigned long) gp[i].ejst,d_size,i);
|
|
allocate((unsigned long) gp[i].oist,d_size,i);
|
|
allocate((unsigned long) gp[i].ojst,d_size,i);
|
|
allocate((unsigned long) gp[i].rlist,d_size,i);
|
|
allocate((unsigned long) gp[i].rljst,d_size,i);
|
|
allocate((unsigned long) gp[i].rlien,d_size,i);
|
|
allocate((unsigned long) gp[i].rljen,d_size,i);
|
|
|
|
allocate((unsigned long) q_multi[i],mg_size,i);
|
|
allocate((unsigned long) rhs_multi[i],mg_size,i);
|
|
allocate((unsigned long) &(gp[i]),sizeof(struct Global_Private),i);
|
|
}
|
|
}
|
|
|
|
*/
|
|
|
|
t2a = (double **) oldga[procid];
|
|
t2b = (double **) oldgb[procid];
|
|
for (i=0;i<im;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for (j=0;j<jm;j++) {
|
|
t1a[j] = 0.0;
|
|
t1b[j] = 0.0;
|
|
}
|
|
}
|
|
|
|
firstcol = 1;
|
|
lastcol = firstcol + gp[procid].rel_num_x[numlev-1] - 1;
|
|
firstrow = 1;
|
|
lastrow = firstrow + gp[procid].rel_num_y[numlev-1] - 1;
|
|
numcols = gp[procid].rel_num_x[numlev-1];
|
|
numrows = gp[procid].rel_num_y[numlev-1];
|
|
j_off = gp[procid].colnum*numcols;
|
|
|
|
if (procid > nprocs/2) {
|
|
psinum = 2;
|
|
} else {
|
|
psinum = 1;
|
|
}
|
|
|
|
/* every process gets its own copy of the timing variables to avoid
|
|
contention at shared memory locations. here, these variables
|
|
are initialized. */
|
|
|
|
ttime = 0.0;
|
|
dhour = 0.0;
|
|
nstep = 0 ;
|
|
day = 0.0;
|
|
|
|
ysca1 = 0.5*ysca;
|
|
if (procid == MASTER) {
|
|
t1a = (double *) f;
|
|
for (iindex = 0;iindex<=jmx[numlev-1]-1;iindex++) {
|
|
y = ((double) iindex)*res;
|
|
t1a[iindex] = f0+beta*(y-ysca1);
|
|
}
|
|
}
|
|
|
|
t2a = (double **) psium[procid];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1]=0.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 0.0;
|
|
}
|
|
}
|
|
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
t2a = (double **) psilm[procid];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1]=0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1]=0.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 0.0;
|
|
}
|
|
}
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
|
|
t2a = (double **) psib[procid];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0]=1.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1]=1.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0]=1.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1]=1.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 1.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 1.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 1.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 1.0;
|
|
}
|
|
}
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
|
|
/* wait until all processes have completed the above initialization */
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_prini,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
/* compute psib array (one-time computation) and integrate into psibi */
|
|
|
|
istart = 1;
|
|
iend = istart + gp[procid].rel_num_y[numlev-1] - 1;
|
|
jstart = 1;
|
|
jend = jstart + gp[procid].rel_num_x[numlev-1] - 1;
|
|
ist = istart;
|
|
ien = iend;
|
|
jst = jstart;
|
|
jen = jend;
|
|
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
istart = 0;
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
jstart = 0;
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
iend = im-1;
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
jend = jm-1;
|
|
}
|
|
|
|
t2a = (double **) rhs_multi[procid][numlev-1];
|
|
t2b = (double **) psib[procid];
|
|
for (i=istart;i<=iend;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for (j=jstart;j<=jend;j++) {
|
|
t1a[j] = t1b[j] * ressqr;
|
|
}
|
|
}
|
|
t2a = (double **) q_multi[procid][numlev-1];
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
t1b = (double *) t2b[0];
|
|
for (j=jstart;j<=jend;j++) {
|
|
t1a[j] = t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
t1b = (double *) t2b[im-1];
|
|
for (j=jstart;j<=jend;j++) {
|
|
t1a[j] = t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (i=istart;i<=iend;i++) {
|
|
t2a[i][0] = t2b[i][0];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (i=istart;i<=iend;i++) {
|
|
t2a[i][jm-1] = t2b[i][jm-1];
|
|
}
|
|
}
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_psini,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
t2a = (double **) psib[procid];
|
|
j = gp[procid].neighbors[UP];
|
|
if (j != -1) {
|
|
t1a = (double *) t2a[0];
|
|
t1b = (double *) psib[j][im-2];
|
|
for (i=1;i<jm-1;i++) {
|
|
t1a[i] = t1b[i];
|
|
}
|
|
}
|
|
j = gp[procid].neighbors[DOWN];
|
|
if (j != -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
t1b = (double *) psib[j][1];
|
|
for (i=1;i<jm-1;i++) {
|
|
t1a[i] = t1b[i];
|
|
}
|
|
}
|
|
j = gp[procid].neighbors[LEFT];
|
|
if (j != -1) {
|
|
t2b = (double **) psib[j];
|
|
for (i=1;i<im-1;i++) {
|
|
t2a[i][0] = t2b[i][jm-2];
|
|
}
|
|
}
|
|
j = gp[procid].neighbors[RIGHT];
|
|
if (j != -1) {
|
|
t2b = (double **) psib[j];
|
|
for (i=1;i<im-1;i++) {
|
|
t2a[i][jm-1] = t2b[i][1];
|
|
}
|
|
}
|
|
|
|
t2a = (double **) q_multi[procid][numlev-1];
|
|
t2b = (double **) psib[procid];
|
|
fac = 1.0 / (4.0 - ressqr*eig2);
|
|
for (i=ist;i<=ien;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
t1c = (double *) t2b[i-1];
|
|
t1d = (double *) t2b[i+1];
|
|
for (j=jst;j<=jen;j++) {
|
|
t1a[j] = fac * (t1d[j]+t1c[j]+t1b[j+1]+t1b[j-1] -
|
|
ressqr*t1b[j]);
|
|
}
|
|
}
|
|
|
|
multig(procid);
|
|
|
|
for (i=istart;i<=iend;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for (j=jstart;j<=jend;j++) {
|
|
t1b[j] = t1a[j];
|
|
}
|
|
}
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_prini,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
/* update the local running sum psibipriv by summing all the resulting
|
|
values in that process's share of the psib matrix */
|
|
|
|
t2a = (double **) psib[procid];
|
|
psibipriv=0.0;
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
psibipriv = psibipriv + 0.25*(t2a[0][0]);
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
psibipriv = psibipriv + 0.25*(t2a[0][jm-1]);
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
psibipriv=psibipriv+0.25*(t2a[im-1][0]);
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
psibipriv=psibipriv+0.25*(t2a[im-1][jm-1]);
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
psibipriv = psibipriv + 0.5*t1a[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
psibipriv = psibipriv + 0.5*t1a[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
psibipriv = psibipriv + 0.5*t2a[j][0];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
psibipriv = psibipriv + 0.5*t2a[j][jm-1];
|
|
}
|
|
}
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
psibipriv = psibipriv + t1a[iindex];
|
|
}
|
|
}
|
|
|
|
/* update the shared variable psibi by summing all the psibiprivs
|
|
of the individual processes into it. note that this combined
|
|
private and shared sum method avoids accessing the shared
|
|
variable psibi once for every element of the matrix. */
|
|
|
|
LOCK(locks->psibilock)
|
|
global->psibi = global->psibi + psibipriv;
|
|
UNLOCK(locks->psibilock)
|
|
|
|
/* initialize psim matrices
|
|
|
|
if there is more than one process, then split the processes
|
|
between the two psim matrices; otherwise, let the single process
|
|
work on one first and then the other */
|
|
|
|
for (psiindex=0;psiindex<=1;psiindex++) {
|
|
t2a = (double **) psim[procid][psiindex];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1] = 0.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 0.0;
|
|
}
|
|
}
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* initialize psi matrices the same way */
|
|
|
|
for (psiindex=0;psiindex<=1;psiindex++) {
|
|
t2a = (double **) psi[procid][psiindex];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1] = 0.0;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = 0.0;
|
|
}
|
|
}
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* compute input curl of wind stress */
|
|
|
|
t2a = (double **) tauz[procid];
|
|
ysca1 = .5*ysca;
|
|
factor= -t0*pi/ysca1;
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = 0.0;
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
t2a[0][jm-1] = factor*sintemp;
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
t2a[im-1][jm-1] = factor*sintemp;
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
sintemp = pi*((double) j+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
curlt = factor*sintemp;
|
|
t1a[j] = curlt;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
sintemp = pi*((double) j+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
curlt = factor*sintemp;
|
|
t1a[j] = curlt;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = 0.0;
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
curlt = factor*sintemp;
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = curlt;
|
|
}
|
|
}
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
sintemp = pi*((double) iindex+j_off)*res/ysca1;
|
|
sintemp = sin(sintemp);
|
|
curlt = factor*sintemp;
|
|
t1a[iindex] = curlt;
|
|
}
|
|
}
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->sl_onetime,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
|
|
/***************************************************************
|
|
one-time stuff over at this point
|
|
***************************************************************/
|
|
|
|
while (!endflag) {
|
|
while ((!dayflag) || (!dhourflag)) {
|
|
dayflag = 0;
|
|
dhourflag = 0;
|
|
if (nstep == 1) {
|
|
if (procid == MASTER) {
|
|
CLOCK(global->trackstart)
|
|
}
|
|
if ((procid == MASTER) || (do_stats)) {
|
|
CLOCK(t1);
|
|
gp[procid].total_time = t1;
|
|
gp[procid].multi_time = 0;
|
|
}
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
|
statistics that one is measuring about the parallel execution */
|
|
}
|
|
|
|
slave2(procid,firstrow,lastrow,numrows,firstcol,lastcol,numcols);
|
|
|
|
/* update time and step number
|
|
note that these time and step variables are private i.e. every
|
|
process has its own copy and keeps track of its own time */
|
|
|
|
ttime = ttime + dtau;
|
|
nstep = nstep + 1;
|
|
day = ttime/86400.0;
|
|
|
|
if (day > ((double) outday0)) {
|
|
dayflag = 1;
|
|
iday = (long) day;
|
|
dhour = dhour+dtau;
|
|
if (dhour >= 86400.0) {
|
|
dhourflag = 1;
|
|
}
|
|
}
|
|
}
|
|
dhour = 0.0;
|
|
|
|
t2a = (double **) psium[procid];
|
|
t2b = (double **) psim[procid][0];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = t2a[0][0]+t2b[0][0];
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = t2a[im-1][0]+t2b[im-1][0];
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1] = t2a[0][jm-1]+t2b[0][jm-1];
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1] = t2a[im-1][jm-1] +
|
|
t2b[im-1][jm-1];
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
t1b = (double *) t2b[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = t1a[j]+t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
t1b = (double *) t2b[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = t1a[j] + t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = t2a[j][0]+t2b[j][0];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = t2a[j][jm-1] +
|
|
t2b[j][jm-1];
|
|
}
|
|
}
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = t1a[iindex] + t1b[iindex];
|
|
}
|
|
}
|
|
|
|
/* update values of psilm array to psilm + psim[2] */
|
|
|
|
t2a = (double **) psilm[procid];
|
|
t2b = (double **) psim[procid][1];
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[0][0] = t2a[0][0]+t2b[0][0];
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
t2a[im-1][0] = t2a[im-1][0]+t2b[im-1][0];
|
|
}
|
|
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[0][jm-1] = t2a[0][jm-1]+t2b[0][jm-1];
|
|
}
|
|
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
t2a[im-1][jm-1] = t2a[im-1][jm-1] +
|
|
t2b[im-1][jm-1];
|
|
}
|
|
if (gp[procid].neighbors[UP] == -1) {
|
|
t1a = (double *) t2a[0];
|
|
t1b = (double *) t2b[0];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = t1a[j]+t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[DOWN] == -1) {
|
|
t1a = (double *) t2a[im-1];
|
|
t1b = (double *) t2b[im-1];
|
|
for (j=firstcol;j<=lastcol;j++) {
|
|
t1a[j] = t1a[j]+t1b[j];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[LEFT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][0] = t2a[j][0]+t2b[j][0];
|
|
}
|
|
}
|
|
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
for (j=firstrow;j<=lastrow;j++) {
|
|
t2a[j][jm-1] = t2a[j][jm-1] + t2b[j][jm-1];
|
|
}
|
|
}
|
|
for (i=firstrow;i<=lastrow;i++) {
|
|
t1a = (double *) t2a[i];
|
|
t1b = (double *) t2b[i];
|
|
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
t1a[iindex] = t1a[iindex] + t1b[iindex];
|
|
}
|
|
}
|
|
if (iday >= (long) outday3) {
|
|
endflag = 1;
|
|
}
|
|
}
|
|
if ((procid == MASTER) || (do_stats)) {
|
|
CLOCK(t1);
|
|
gp[procid].total_time = t1-gp[procid].total_time;
|
|
}
|
|
}
|
|
|