gem5/splash2/codes/apps/barnes/code.C

923 lines
28 KiB
C
Raw Permalink Normal View History

/*************************************************************************/
/* */
/* Copyright (c) 1994 Stanford University */
/* */
/* All rights reserved. */
/* */
/* Permission is given to use, copy, and modify this software for any */
/* non-commercial purpose as long as this copyright notice is not */
/* removed. All other uses, including redistribution in whole or in */
/* part, are forbidden without prior written permission. */
/* */
/* This software is provided with absolutely no warranty and no */
/* support. */
/* */
/*************************************************************************/
/*
Usage: BARNES <options> < inputfile
Command line options:
-h : Print out input file description
Input parameters should be placed in a file and redirected through
standard input. There are a total of twelve parameters, and all of
them have default values.
1) infile (char*) : The name of an input file that contains particle
data.
The format of the file is:
a) An int representing the number of particles in the distribution
b) An int representing the dimensionality of the problem (3-D)
c) A double representing the current time of the simulation
d) Doubles representing the masses of all the particles
e) A vector (length equal to the dimensionality) of doubles
representing the positions of all the particles
f) A vector (length equal to the dimensionality) of doubles
representing the velocities of all the particles
Each of these numbers can be separated by any amount of whitespace.
2) nbody (int) : If no input file is specified (the first line is
blank), this number specifies the number of particles to generate
under a plummer model. Default is 16384.
3) seed (int) : The seed used by the random number generator.
Default is 123.
4) outfile (char*) : The name of the file that snapshots will be
printed to. This feature has been disabled in the SPLASH release.
Default is NULL.
5) dtime (double) : The integration time-step.
Default is 0.025.
6) eps (double) : The usual potential softening
Default is 0.05.
7) tol (double) : The cell subdivision tolerance.
Default is 1.0.
8) fcells (double) : Number of cells created = fcells * number of
leaves.
Default is 2.0.
9) fleaves (double) : Number of leaves created = fleaves * nbody.
Default is 0.5.
10) tstop (double) : The time to stop integration.
Default is 0.075.
11) dtout (double) : The data-output interval.
Default is 0.25.
12) NPROC (int) : The number of processors.
Default is 1.
*/
MAIN_ENV
#define global /* nada */
#include "stdinc.h"
string defv[] = { /* DEFAULT PARAMETER VALUES */
/* file names for input/output */
"in=", /* snapshot of initial conditions */
"out=", /* stream of output snapshots */
/* params, used if no input specified, to make a Plummer Model */
"nbody=16384", /* number of particles to generate */
"seed=123", /* random number generator seed */
/* params to control N-body integration */
"dtime=0.025", /* integration time-step */
"eps=0.05", /* usual potential softening */
"tol=1.0", /* cell subdivision tolerence */
"fcells=2.0", /* cell allocation parameter */
"fleaves=0.5", /* leaf allocation parameter */
"tstop=0.075", /* time to stop integration */
"dtout=0.25", /* data-output interval */
"NPROC=1", /* number of processors */
};
/* The more complicated 3D case */
#define NUM_DIRECTIONS 32
#define BRC_FUC 0
#define BRC_FRA 1
#define BRA_FDA 2
#define BRA_FRC 3
#define BLC_FDC 4
#define BLC_FLA 5
#define BLA_FUA 6
#define BLA_FLC 7
#define BUC_FUA 8
#define BUC_FLC 9
#define BUA_FUC 10
#define BUA_FRA 11
#define BDC_FDA 12
#define BDC_FRC 13
#define BDA_FDC 14
#define BDA_FLA 15
#define FRC_BUC 16
#define FRC_BRA 17
#define FRA_BDA 18
#define FRA_BRC 19
#define FLC_BDC 20
#define FLC_BLA 21
#define FLA_BUA 22
#define FLA_BLC 23
#define FUC_BUA 24
#define FUC_BLC 25
#define FUA_BUC 26
#define FUA_BRA 27
#define FDC_BDA 28
#define FDC_BRC 29
#define FDA_BDC 30
#define FDA_BLA 31
static long Child_Sequence[NUM_DIRECTIONS][NSUB] =
{
{ 2, 5, 6, 1, 0, 3, 4, 7}, /* BRC_FUC */
{ 2, 5, 6, 1, 0, 7, 4, 3}, /* BRC_FRA */
{ 1, 6, 5, 2, 3, 0, 7, 4}, /* BRA_FDA */
{ 1, 6, 5, 2, 3, 4, 7, 0}, /* BRA_FRC */
{ 6, 1, 2, 5, 4, 7, 0, 3}, /* BLC_FDC */
{ 6, 1, 2, 5, 4, 3, 0, 7}, /* BLC_FLA */
{ 5, 2, 1, 6, 7, 4, 3, 0}, /* BLA_FUA */
{ 5, 2, 1, 6, 7, 0, 3, 4}, /* BLA_FLC */
{ 1, 2, 5, 6, 7, 4, 3, 0}, /* BUC_FUA */
{ 1, 2, 5, 6, 7, 0, 3, 4}, /* BUC_FLC */
{ 6, 5, 2, 1, 0, 3, 4, 7}, /* BUA_FUC */
{ 6, 5, 2, 1, 0, 7, 4, 3}, /* BUA_FRA */
{ 5, 6, 1, 2, 3, 0, 7, 4}, /* BDC_FDA */
{ 5, 6, 1, 2, 3, 4, 7, 0}, /* BDC_FRC */
{ 2, 1, 6, 5, 4, 7, 0, 3}, /* BDA_FDC */
{ 2, 1, 6, 5, 4, 3, 0, 7}, /* BDA_FLA */
{ 3, 4, 7, 0, 1, 2, 5, 6}, /* FRC_BUC */
{ 3, 4, 7, 0, 1, 6, 5, 2}, /* FRC_BRA */
{ 0, 7, 4, 3, 2, 1, 6, 5}, /* FRA_BDA */
{ 0, 7, 4, 3, 2, 5, 6, 1}, /* FRA_BRC */
{ 7, 0, 3, 4, 5, 6, 1, 2}, /* FLC_BDC */
{ 7, 0, 3, 4, 5, 2, 1, 6}, /* FLC_BLA */
{ 4, 3, 0, 7, 6, 5, 2, 1}, /* FLA_BUA */
{ 4, 3, 0, 7, 6, 1, 2, 5}, /* FLA_BLC */
{ 0, 3, 4, 7, 6, 5, 2, 1}, /* FUC_BUA */
{ 0, 3, 4, 7, 6, 1, 2, 5}, /* FUC_BLC */
{ 7, 4, 3, 0, 1, 2, 5, 6}, /* FUA_BUC */
{ 7, 4, 3, 0, 1, 6, 5, 2}, /* FUA_BRA */
{ 4, 7, 0, 3, 2, 1, 6, 5}, /* FDC_BDA */
{ 4, 7, 0, 3, 2, 5, 6, 1}, /* FDC_BRC */
{ 3, 0, 7, 4, 5, 6, 1, 2}, /* FDA_BDC */
{ 3, 0, 7, 4, 5, 2, 1, 6}, /* FDA_BLA */
};
static long Direction_Sequence[NUM_DIRECTIONS][NSUB] =
{
{ FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
/* BRC_FUC */
{ FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC },
/* BRC_FRA */
{ FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC },
/* BRA_FDA */
{ FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
/* BRA_FRC */
{ FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA },
/* BLC_FDC */
{ FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC },
/* BLC_FLA */
{ FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC },
/* BLA_FUA */
{ FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA },
/* BLA_FLC */
{ FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC },
/* BUC_FUA */
{ FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA },
/* BUC_FLC */
{ FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
/* BUA_FUC */
{ FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC },
/* BUA_FRA */
{ FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC },
/* BDC_FDA */
{ FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
/* BDC_FRC */
{ FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA },
/* BDA_FDC */
{ FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC },
/* BDA_FLA */
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA },
/* FRC_BUC */
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC },
/* FRC_BRA */
{ BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC },
/* FRA_BDA */
{ BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA },
/* FRA_BRC */
{ BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA },
/* FLC_BDC */
{ BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC },
/* FLC_BLA */
{ BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC },
/* FLA_BUA */
{ BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA },
/* FLA_BLC */
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC },
/* FUC_BUA */
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA },
/* FUC_BLC */
{ BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA },
/* FUA_BUC */
{ BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC },
/* FUA_BRA */
{ BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC },
/* FDC_BDA */
{ BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA },
/* FDC_BRC */
{ BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA },
/* FDA_BDC */
{ BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC },
/* FDA_BLA */
};
int main (int argc, string argv[])
{
long c;
while ((c = getopt(argc, argv, "h")) != -1) {
switch(c) {
case 'h':
Help();
exit(-1);
break;
default:
fprintf(stderr, "Only valid option is \"-h\".\n");
exit(-1);
break;
}
}
Global = NULL;
initparam(defv);
startrun();
initoutput();
tab_init();
Global->tracktime = 0;
Global->partitiontime = 0;
Global->treebuildtime = 0;
Global->forcecalctime = 0;
Global->current_id = 0;
CLOCK(Global->computestart);
printf("COMPUTESTART = %12lu\n",Global->computestart);
CREATE(SlaveStart, NPROC);
WAIT_FOR_END(NPROC);
CLOCK(Global->computeend);
printf("COMPUTEEND = %12lu\n",Global->computeend);
printf("COMPUTETIME = %12lu\n",Global->computeend - Global->computestart);
printf("TRACKTIME = %12lu\n",Global->tracktime);
printf("PARTITIONTIME = %12lu\t%5.2f\n",Global->partitiontime,
((float)Global->partitiontime)/Global->tracktime);
printf("TREEBUILDTIME = %12lu\t%5.2f\n",Global->treebuildtime,
((float)Global->treebuildtime)/Global->tracktime);
printf("FORCECALCTIME = %12lu\t%5.2f\n",Global->forcecalctime,
((float)Global->forcecalctime)/Global->tracktime);
printf("RESTTIME = %12lu\t%5.2f\n",
Global->tracktime - Global->partitiontime -
Global->treebuildtime - Global->forcecalctime,
((float)(Global->tracktime-Global->partitiontime-
Global->treebuildtime-Global->forcecalctime))/
Global->tracktime);
MAIN_END;
}
/*
* ANLINIT : initialize ANL macros
*/
void ANLinit()
{
MAIN_INITENV(,70000000,);
/* Allocate global, shared memory */
Global = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory));
if (Global==NULL) error("No initialization for Global\n");
BARINIT(Global->Barrier, NPROC);
LOCKINIT(Global->CountLock);
LOCKINIT(Global->io_lock);
}
/*
* INIT_ROOT: Processor 0 reinitialize the global root at each time step
*/
void init_root()
{
long i;
Global->G_root=Local[0].ctab;
Global->G_root->seqnum = 0;
Type(Global->G_root) = CELL;
Done(Global->G_root) = FALSE;
Level(Global->G_root) = IMAX >> 1;
for (i = 0; i < NSUB; i++) {
Subp(Global->G_root)[i] = NULL;
}
Local[0].mynumcell=1;
}
long Log_base_2(long number)
{
long cumulative;
long out;
cumulative = 1;
for (out = 0; out < 20; out++) {
if (cumulative == number) {
return(out);
}
else {
cumulative = cumulative * 2;
}
}
fprintf(stderr,"Log_base_2: couldn't find log2 of %ld\n", number);
exit(-1);
}
/*
* TAB_INIT : allocate body and cell data space
*/
void tab_init()
{
long i;
/*allocate leaf/cell space */
maxleaf = (long) ((double) fleaves * nbody);
maxcell = fcells * maxleaf;
for (i = 0; i < NPROC; ++i) {
Local[i].ctab = (cellptr) G_MALLOC((maxcell / NPROC) * sizeof(cell));
Local[i].ltab = (leafptr) G_MALLOC((maxleaf / NPROC) * sizeof(leaf));
}
/*allocate space for personal lists of body pointers */
maxmybody = (nbody+maxleaf*MAX_BODIES_PER_LEAF)/NPROC;
Local[0].mybodytab = (bodyptr*) G_MALLOC(NPROC*maxmybody*sizeof(bodyptr));
/* space is allocated so that every */
/* process can have a maximum of maxmybody pointers to bodies */
/* then there is an array of bodies called bodytab which is */
/* allocated in the distribution generation or when the distr. */
/* file is read */
maxmycell = maxcell / NPROC;
maxmyleaf = maxleaf / NPROC;
Local[0].mycelltab = (cellptr*) G_MALLOC(NPROC*maxmycell*sizeof(cellptr));
Local[0].myleaftab = (leafptr*) G_MALLOC(NPROC*maxmyleaf*sizeof(leafptr));
CellLock = (struct CellLockType *) G_MALLOC(sizeof(struct CellLockType));
ALOCKINIT(CellLock->CL,MAXLOCK);
}
/*
* SLAVESTART: main task for each processor
*/
void SlaveStart()
{
long ProcessId;
/* Get unique ProcessId */
LOCK(Global->CountLock);
ProcessId = Global->current_id++;
UNLOCK(Global->CountLock);
BARINCLUDE(Global->Barrier);
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
processors to avoid migration */
/* initialize mybodytabs */
Local[ProcessId].mybodytab = Local[0].mybodytab + (maxmybody * ProcessId);
/* note that every process has its own copy */
/* of mybodytab, which was initialized to the */
/* beginning of the whole array by proc. 0 */
/* before create */
Local[ProcessId].mycelltab = Local[0].mycelltab + (maxmycell * ProcessId);
Local[ProcessId].myleaftab = Local[0].myleaftab + (maxmyleaf * ProcessId);
/* POSSIBLE ENHANCEMENT: Here is where one might distribute the
data across physically distributed memories as desired.
One way to do this is as follows:
long i;
if (ProcessId == 0) {
for (i=0;i<NPROC;i++) {
Place all addresses x such that
&(Local[i]) <= x < &(Local[i])+
sizeof(struct local_memory) on node i
Place all addresses x such that
&(Local[i].mybodytab) <= x < &(Local[i].mybodytab)+
maxmybody * sizeof(bodyptr) - 1 on node i
Place all addresses x such that
&(Local[i].mycelltab) <= x < &(Local[i].mycelltab)+
maxmycell * sizeof(cellptr) - 1 on node i
Place all addresses x such that
&(Local[i].myleaftab) <= x < &(Local[i].myleaftab)+
maxmyleaf * sizeof(leafptr) - 1 on node i
}
}
barrier(Global->Barstart,NPROC);
*/
Local[ProcessId].tout = Local[0].tout;
Local[ProcessId].tnow = Local[0].tnow;
Local[ProcessId].nstep = Local[0].nstep;
find_my_initial_bodies(bodytab, nbody, ProcessId);
/* main loop */
while (Local[ProcessId].tnow < tstop + 0.1 * dtime) {
stepsystem(ProcessId);
// printtree(Global->G_root);
// printf("Going to next step!!!\n");
}
}
/*
* STARTRUN: startup hierarchical N-body code.
*/
void startrun()
{
long seed;
infile = getparam("in");
if (*infile != '\0'/*NULL*/) {
inputdata();
}
else {
nbody = getiparam("nbody");
if (nbody < 1) {
error("startrun: absurd nbody\n");
}
seed = getiparam("seed");
}
outfile = getparam("out");
dtime = getdparam("dtime");
dthf = 0.5 * dtime;
eps = getdparam("eps");
epssq = eps*eps;
tol = getdparam("tol");
tolsq = tol*tol;
fcells = getdparam("fcells");
fleaves = getdparam("fleaves");
tstop = getdparam("tstop");
dtout = getdparam("dtout");
NPROC = getiparam("NPROC");
Local[0].nstep = 0;
pranset(seed);
testdata();
ANLinit();
setbound();
Local[0].tout = Local[0].tnow + dtout;
}
/*
* TESTDATA: generate Plummer model initial conditions for test runs,
* scaled to units such that M = -4E = G = 1 (Henon, Hegge, etc).
* See Aarseth, SJ, Henon, M, & Wielen, R (1974) Astr & Ap, 37, 183.
*/
#define MFRAC 0.999 /* mass cut off at MFRAC of total */
void testdata()
{
real rsc, vsc, r, v, x, y;
vector cmr, cmv;
register bodyptr p;
long rejects = 0;
long halfnbody, i;
float offset;
register bodyptr cp;
headline = "Hack code: Plummer model";
Local[0].tnow = 0.0;
bodytab = (bodyptr) G_MALLOC(nbody * sizeof(body));
if (bodytab == NULL) {
error("testdata: not enough memory\n");
}
rsc = 9 * PI / 16;
vsc = sqrt(1.0 / rsc);
CLRV(cmr);
CLRV(cmv);
halfnbody = nbody / 2;
if (nbody % 2 != 0) halfnbody++;
for (p = bodytab; p < bodytab+halfnbody; p++) {
Type(p) = BODY;
Mass(p) = 1.0 / nbody;
Cost(p) = 1;
r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1);
/* reject radii greater than 10 */
while (r > 9.0) {
rejects++;
r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1);
}
pickshell(Pos(p), rsc * r);
ADDV(cmr, cmr, Pos(p));
do {
x = xrand(0.0, 1.0);
y = xrand(0.0, 0.1);
} while (y > x*x * pow(1 - x*x, 3.5));
v = sqrt(2.0) * x / pow(1 + r*r, 0.25);
pickshell(Vel(p), vsc * v);
ADDV(cmv, cmv, Vel(p));
}
offset = 4.0;
for (p = bodytab + halfnbody; p < bodytab+nbody; p++) {
Type(p) = BODY;
Mass(p) = 1.0 / nbody;
Cost(p) = 1;
cp = p - halfnbody;
for (i = 0; i < NDIM; i++){
Pos(p)[i] = Pos(cp)[i] + offset;
Vel(p)[i] = Vel(cp)[i];
}
ADDV(cmr, cmr, Pos(p));
ADDV(cmv, cmv, Vel(p));
}
DIVVS(cmr, cmr, (real) nbody);
DIVVS(cmv, cmv, (real) nbody);
for (p = bodytab; p < bodytab+nbody; p++) {
SUBV(Pos(p), Pos(p), cmr);
SUBV(Vel(p), Vel(p), cmv);
}
}
/*
* PICKSHELL: pick a random point on a sphere of specified radius.
*/
void pickshell(real vec[], real rad)
{
register long k;
double rsq, rsc;
do {
for (k = 0; k < NDIM; k++) {
vec[k] = xrand(-1.0, 1.0);
}
DOTVP(rsq, vec, vec);
} while (rsq > 1.0);
rsc = rad / sqrt(rsq);
MULVS(vec, vec, rsc);
}
long intpow(long i, long j)
{
long k;
long temp = 1;
for (k = 0; k < j; k++)
temp = temp*i;
return temp;
}
/*
* STEPSYSTEM: advance N-body system one time-step.
*/
void stepsystem(long ProcessId)
{
long i;
real Cavg;
bodyptr p,*pp;
vector dvel, vel1, dpos;
long trackstart, trackend;
long partitionstart, partitionend;
long treebuildstart, treebuildend;
long forcecalcstart, forcecalcend;
if (Local[ProcessId].nstep == 2) {
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
statistics that one is measuring about the parallel execution */
}
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
CLOCK(trackstart);
}
if (ProcessId == 0) {
init_root();
}
else {
Local[ProcessId].mynumcell = 0;
Local[ProcessId].mynumleaf = 0;
}
/* start at same time */
BARRIER(Global->Barrier,NPROC);
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
CLOCK(treebuildstart);
}
/* load bodies into tree */
maketree(ProcessId);
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
CLOCK(treebuildend);
Global->treebuildtime += treebuildend - treebuildstart;
}
Housekeep(ProcessId);
Cavg = (real) Cost(Global->G_root) / (real)NPROC ;
Local[ProcessId].workMin = (long) (Cavg * ProcessId);
Local[ProcessId].workMax = (long) (Cavg * (ProcessId + 1)
+ (ProcessId == (NPROC - 1)));
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
CLOCK(partitionstart);
}
Local[ProcessId].mynbody = 0;
find_my_bodies(Global->G_root, 0, BRC_FUC, ProcessId );
/* B*RRIER(Global->Barcom,NPROC); */
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
CLOCK(partitionend);
Global->partitiontime += partitionend - partitionstart;
}
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
CLOCK(forcecalcstart);
}
ComputeForces(ProcessId);
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
CLOCK(forcecalcend);
Global->forcecalctime += forcecalcend - forcecalcstart;
}
/* advance my bodies */
for (pp = Local[ProcessId].mybodytab;
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody; pp++) {
p = *pp;
MULVS(dvel, Acc(p), dthf);
ADDV(vel1, Vel(p), dvel);
MULVS(dpos, vel1, dtime);
ADDV(Pos(p), Pos(p), dpos);
ADDV(Vel(p), vel1, dvel);
for (i = 0; i < NDIM; i++) {
if (Pos(p)[i]<Local[ProcessId].min[i]) {
Local[ProcessId].min[i]=Pos(p)[i];
}
if (Pos(p)[i]>Local[ProcessId].max[i]) {
Local[ProcessId].max[i]=Pos(p)[i] ;
}
}
}
LOCK(Global->CountLock);
for (i = 0; i < NDIM; i++) {
if (Global->min[i] > Local[ProcessId].min[i]) {
Global->min[i] = Local[ProcessId].min[i];
}
if (Global->max[i] < Local[ProcessId].max[i]) {
Global->max[i] = Local[ProcessId].max[i];
}
}
UNLOCK(Global->CountLock);
/* bar needed to make sure that every process has computed its min */
/* and max coordinates, and has accumulated them into the global */
/* min and max, before the new dimensions are computed */
BARRIER(Global->Barrier,NPROC);
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
CLOCK(trackend);
Global->tracktime += trackend - trackstart;
}
if (ProcessId==0) {
Global->rsize=0;
SUBV(Global->max,Global->max,Global->min);
for (i = 0; i < NDIM; i++) {
if (Global->rsize < Global->max[i]) {
Global->rsize = Global->max[i];
}
}
ADDVS(Global->rmin,Global->min,-Global->rsize/100000.0);
Global->rsize = 1.00002*Global->rsize;
SETVS(Global->min,1E99);
SETVS(Global->max,-1E99);
}
Local[ProcessId].nstep++;
Local[ProcessId].tnow = Local[ProcessId].tnow + dtime;
}
void ComputeForces(long ProcessId)
{
bodyptr p,*pp;
vector acc1, dacc, dvel;
for (pp = Local[ProcessId].mybodytab;
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody;pp++) {
p = *pp;
SETV(acc1, Acc(p));
Cost(p)=0;
hackgrav(p,ProcessId);
Local[ProcessId].myn2bcalc += Local[ProcessId].myn2bterm;
Local[ProcessId].mynbccalc += Local[ProcessId].mynbcterm;
if (!Local[ProcessId].skipself) { /* did we miss self-int? */
Local[ProcessId].myselfint++; /* count another goofup */
}
if (Local[ProcessId].nstep > 0) {
/* use change in accel to make 2nd order correction to vel */
SUBV(dacc, Acc(p), acc1);
MULVS(dvel, dacc, dthf);
ADDV(Vel(p), Vel(p), dvel);
}
}
}
/*
* FIND_MY_INITIAL_BODIES: puts into mybodytab the initial list of bodies
* assigned to the processor.
*/
void find_my_initial_bodies(bodyptr btab, long nbody, long ProcessId)
{
long extra,offset,i;
Local[ProcessId].mynbody = nbody / NPROC;
extra = nbody % NPROC;
if (ProcessId < extra) {
Local[ProcessId].mynbody++;
offset = Local[ProcessId].mynbody * ProcessId;
}
if (ProcessId >= extra) {
offset = (Local[ProcessId].mynbody+1) * extra + (ProcessId - extra)
* Local[ProcessId].mynbody;
}
for (i=0; i < Local[ProcessId].mynbody; i++) {
Local[ProcessId].mybodytab[i] = &(btab[offset+i]);
}
BARRIER(Global->Barrier,NPROC);
}
void find_my_bodies(nodeptr mycell, long work, long direction, long ProcessId)
{
long i;
leafptr l;
nodeptr qptr;
if (Type(mycell) == LEAF) {
l = (leafptr) mycell;
for (i = 0; i < l->num_bodies; i++) {
if (work >= Local[ProcessId].workMin - .1) {
if((Local[ProcessId].mynbody+2) > maxmybody) {
error("find_my_bodies: Processor %ld needs more than %ld bodies; increase fleaves\n", ProcessId, maxmybody);
}
Local[ProcessId].mybodytab[Local[ProcessId].mynbody++] =
Bodyp(l)[i];
}
work += Cost(Bodyp(l)[i]);
if (work >= Local[ProcessId].workMax-.1) {
break;
}
}
}
else {
for(i = 0; (i < NSUB) && (work < (Local[ProcessId].workMax - .1)); i++){
qptr = Subp(mycell)[Child_Sequence[direction][i]];
if (qptr!=NULL) {
if ((work+Cost(qptr)) >= (Local[ProcessId].workMin -.1)) {
find_my_bodies(qptr,work, Direction_Sequence[direction][i],
ProcessId);
}
work += Cost(qptr);
}
}
}
}
/*
* HOUSEKEEP: reinitialize the different variables (in particular global
* variables) between each time step.
*/
void Housekeep(long ProcessId)
{
Local[ProcessId].myn2bcalc = Local[ProcessId].mynbccalc
= Local[ProcessId].myselfint = 0;
SETVS(Local[ProcessId].min,1E99);
SETVS(Local[ProcessId].max,-1E99);
}
/*
* SETBOUND: Compute the initial size of the root of the tree; only done
* before first time step, and only processor 0 does it
*/
void setbound()
{
long i;
real side ;
bodyptr p;
SETVS(Local[0].min,1E99);
SETVS(Local[0].max,-1E99);
side=0;
for (p = bodytab; p < bodytab+nbody; p++) {
for (i=0; i<NDIM;i++) {
if (Pos(p)[i]<Local[0].min[i]) Local[0].min[i]=Pos(p)[i] ;
if (Pos(p)[i]>Local[0].max[i]) Local[0].max[i]=Pos(p)[i] ;
}
}
SUBV(Local[0].max,Local[0].max,Local[0].min);
for (i=0; i<NDIM;i++) if (side<Local[0].max[i]) side=Local[0].max[i];
ADDVS(Global->rmin,Local[0].min,-side/100000.0);
Global->rsize = 1.00002*side;
SETVS(Global->max,-1E99);
SETVS(Global->min,1E99);
}
void Help()
{
printf("There are a total of twelve parameters, and all of them have default values.\n");
printf("\n");
printf("1) infile (char*) : The name of an input file that contains particle data. \n");
printf(" The format of the file is:\n");
printf("\ta) An int representing the number of particles in the distribution\n");
printf("\tb) An int representing the dimensionality of the problem (3-D)\n");
printf("\tc) A double representing the current time of the simulation\n");
printf("\td) Doubles representing the masses of all the particles\n");
printf("\te) A vector (length equal to the dimensionality) of doubles\n");
printf("\t representing the positions of all the particles\n");
printf("\tf) A vector (length equal to the dimensionality) of doubles\n");
printf("\t representing the velocities of all the particles\n");
printf("\n");
printf(" Each of these numbers can be separated by any amount of whitespace.\n");
printf("\n");
printf("2) nbody (int) : If no input file is specified (the first line is blank), this\n");
printf(" number specifies the number of particles to generate under a plummer model.\n");
printf(" Default is 16384.\n");
printf("\n");
printf("3) seed (int) : The seed used by the random number generator.\n");
printf(" Default is 123.\n");
printf("\n");
printf("4) outfile (char*) : The name of the file that snapshots will be printed to. \n");
printf(" This feature has been disabled in the SPLASH release.\n");
printf(" Default is NULL.\n");
printf("\n");
printf("5) dtime (double) : The integration time-step.\n");
printf(" Default is 0.025.\n");
printf("\n");
printf("6) eps (double) : The usual potential softening\n");
printf(" Default is 0.05.\n");
printf("\n");
printf("7) tol (double) : The cell subdivision tolerance.\n");
printf(" Default is 1.0.\n");
printf("\n");
printf("8) fcells (double) : The total number of cells created is equal to \n");
printf(" fcells * number of leaves.\n");
printf(" Default is 2.0.\n");
printf("\n");
printf("9) fleaves (double) : The total number of leaves created is equal to \n");
printf(" fleaves * nbody.\n");
printf(" Default is 0.5.\n");
printf("\n");
printf("10) tstop (double) : The time to stop integration.\n");
printf(" Default is 0.075.\n");
printf("\n");
printf("11) dtout (double) : The data-output interval.\n");
printf(" Default is 0.25.\n");
printf("\n");
printf("12) NPROC (int) : The number of processors.\n");
printf(" Default is 1.\n");
}