Sanchayan Maity
0f4b39775c
During the last commit of splash2 benchmark it seems before committing when we ran "make clean", it effectively undid what the patch at below link did http://www.capsl.udel.edu/splash/Download.html Fix this since without this it is not possible to build the arcane splash2 benchmark.
922 lines
28 KiB
C
922 lines
28 KiB
C
/*************************************************************************/
|
|
/* */
|
|
/* Copyright (c) 1994 Stanford University */
|
|
/* */
|
|
/* All rights reserved. */
|
|
/* */
|
|
/* Permission is given to use, copy, and modify this software for any */
|
|
/* non-commercial purpose as long as this copyright notice is not */
|
|
/* removed. All other uses, including redistribution in whole or in */
|
|
/* part, are forbidden without prior written permission. */
|
|
/* */
|
|
/* This software is provided with absolutely no warranty and no */
|
|
/* support. */
|
|
/* */
|
|
/*************************************************************************/
|
|
/*
|
|
Usage: BARNES <options> < inputfile
|
|
|
|
Command line options:
|
|
|
|
-h : Print out input file description
|
|
|
|
Input parameters should be placed in a file and redirected through
|
|
standard input. There are a total of twelve parameters, and all of
|
|
them have default values.
|
|
|
|
1) infile (char*) : The name of an input file that contains particle
|
|
data.
|
|
|
|
The format of the file is:
|
|
a) An int representing the number of particles in the distribution
|
|
b) An int representing the dimensionality of the problem (3-D)
|
|
c) A double representing the current time of the simulation
|
|
d) Doubles representing the masses of all the particles
|
|
e) A vector (length equal to the dimensionality) of doubles
|
|
representing the positions of all the particles
|
|
f) A vector (length equal to the dimensionality) of doubles
|
|
representing the velocities of all the particles
|
|
|
|
Each of these numbers can be separated by any amount of whitespace.
|
|
2) nbody (int) : If no input file is specified (the first line is
|
|
blank), this number specifies the number of particles to generate
|
|
under a plummer model. Default is 16384.
|
|
3) seed (int) : The seed used by the random number generator.
|
|
Default is 123.
|
|
4) outfile (char*) : The name of the file that snapshots will be
|
|
printed to. This feature has been disabled in the SPLASH release.
|
|
Default is NULL.
|
|
5) dtime (double) : The integration time-step.
|
|
Default is 0.025.
|
|
6) eps (double) : The usual potential softening
|
|
Default is 0.05.
|
|
7) tol (double) : The cell subdivision tolerance.
|
|
Default is 1.0.
|
|
8) fcells (double) : Number of cells created = fcells * number of
|
|
leaves.
|
|
Default is 2.0.
|
|
9) fleaves (double) : Number of leaves created = fleaves * nbody.
|
|
Default is 0.5.
|
|
10) tstop (double) : The time to stop integration.
|
|
Default is 0.075.
|
|
11) dtout (double) : The data-output interval.
|
|
Default is 0.25.
|
|
12) NPROC (int) : The number of processors.
|
|
Default is 1.
|
|
*/
|
|
|
|
MAIN_ENV
|
|
|
|
#define global /* nada */
|
|
|
|
#include "stdinc.h"
|
|
|
|
string defv[] = { /* DEFAULT PARAMETER VALUES */
|
|
/* file names for input/output */
|
|
"in=", /* snapshot of initial conditions */
|
|
"out=", /* stream of output snapshots */
|
|
|
|
/* params, used if no input specified, to make a Plummer Model */
|
|
"nbody=16384", /* number of particles to generate */
|
|
"seed=123", /* random number generator seed */
|
|
|
|
/* params to control N-body integration */
|
|
"dtime=0.025", /* integration time-step */
|
|
"eps=0.05", /* usual potential softening */
|
|
"tol=1.0", /* cell subdivision tolerence */
|
|
"fcells=2.0", /* cell allocation parameter */
|
|
"fleaves=0.5", /* leaf allocation parameter */
|
|
|
|
"tstop=0.075", /* time to stop integration */
|
|
"dtout=0.25", /* data-output interval */
|
|
|
|
"NPROC=1", /* number of processors */
|
|
};
|
|
|
|
/* The more complicated 3D case */
|
|
#define NUM_DIRECTIONS 32
|
|
#define BRC_FUC 0
|
|
#define BRC_FRA 1
|
|
#define BRA_FDA 2
|
|
#define BRA_FRC 3
|
|
#define BLC_FDC 4
|
|
#define BLC_FLA 5
|
|
#define BLA_FUA 6
|
|
#define BLA_FLC 7
|
|
#define BUC_FUA 8
|
|
#define BUC_FLC 9
|
|
#define BUA_FUC 10
|
|
#define BUA_FRA 11
|
|
#define BDC_FDA 12
|
|
#define BDC_FRC 13
|
|
#define BDA_FDC 14
|
|
#define BDA_FLA 15
|
|
|
|
#define FRC_BUC 16
|
|
#define FRC_BRA 17
|
|
#define FRA_BDA 18
|
|
#define FRA_BRC 19
|
|
#define FLC_BDC 20
|
|
#define FLC_BLA 21
|
|
#define FLA_BUA 22
|
|
#define FLA_BLC 23
|
|
#define FUC_BUA 24
|
|
#define FUC_BLC 25
|
|
#define FUA_BUC 26
|
|
#define FUA_BRA 27
|
|
#define FDC_BDA 28
|
|
#define FDC_BRC 29
|
|
#define FDA_BDC 30
|
|
#define FDA_BLA 31
|
|
|
|
static long Child_Sequence[NUM_DIRECTIONS][NSUB] =
|
|
{
|
|
{ 2, 5, 6, 1, 0, 3, 4, 7}, /* BRC_FUC */
|
|
{ 2, 5, 6, 1, 0, 7, 4, 3}, /* BRC_FRA */
|
|
{ 1, 6, 5, 2, 3, 0, 7, 4}, /* BRA_FDA */
|
|
{ 1, 6, 5, 2, 3, 4, 7, 0}, /* BRA_FRC */
|
|
{ 6, 1, 2, 5, 4, 7, 0, 3}, /* BLC_FDC */
|
|
{ 6, 1, 2, 5, 4, 3, 0, 7}, /* BLC_FLA */
|
|
{ 5, 2, 1, 6, 7, 4, 3, 0}, /* BLA_FUA */
|
|
{ 5, 2, 1, 6, 7, 0, 3, 4}, /* BLA_FLC */
|
|
{ 1, 2, 5, 6, 7, 4, 3, 0}, /* BUC_FUA */
|
|
{ 1, 2, 5, 6, 7, 0, 3, 4}, /* BUC_FLC */
|
|
{ 6, 5, 2, 1, 0, 3, 4, 7}, /* BUA_FUC */
|
|
{ 6, 5, 2, 1, 0, 7, 4, 3}, /* BUA_FRA */
|
|
{ 5, 6, 1, 2, 3, 0, 7, 4}, /* BDC_FDA */
|
|
{ 5, 6, 1, 2, 3, 4, 7, 0}, /* BDC_FRC */
|
|
{ 2, 1, 6, 5, 4, 7, 0, 3}, /* BDA_FDC */
|
|
{ 2, 1, 6, 5, 4, 3, 0, 7}, /* BDA_FLA */
|
|
|
|
{ 3, 4, 7, 0, 1, 2, 5, 6}, /* FRC_BUC */
|
|
{ 3, 4, 7, 0, 1, 6, 5, 2}, /* FRC_BRA */
|
|
{ 0, 7, 4, 3, 2, 1, 6, 5}, /* FRA_BDA */
|
|
{ 0, 7, 4, 3, 2, 5, 6, 1}, /* FRA_BRC */
|
|
{ 7, 0, 3, 4, 5, 6, 1, 2}, /* FLC_BDC */
|
|
{ 7, 0, 3, 4, 5, 2, 1, 6}, /* FLC_BLA */
|
|
{ 4, 3, 0, 7, 6, 5, 2, 1}, /* FLA_BUA */
|
|
{ 4, 3, 0, 7, 6, 1, 2, 5}, /* FLA_BLC */
|
|
{ 0, 3, 4, 7, 6, 5, 2, 1}, /* FUC_BUA */
|
|
{ 0, 3, 4, 7, 6, 1, 2, 5}, /* FUC_BLC */
|
|
{ 7, 4, 3, 0, 1, 2, 5, 6}, /* FUA_BUC */
|
|
{ 7, 4, 3, 0, 1, 6, 5, 2}, /* FUA_BRA */
|
|
{ 4, 7, 0, 3, 2, 1, 6, 5}, /* FDC_BDA */
|
|
{ 4, 7, 0, 3, 2, 5, 6, 1}, /* FDC_BRC */
|
|
{ 3, 0, 7, 4, 5, 6, 1, 2}, /* FDA_BDC */
|
|
{ 3, 0, 7, 4, 5, 2, 1, 6}, /* FDA_BLA */
|
|
};
|
|
|
|
static long Direction_Sequence[NUM_DIRECTIONS][NSUB] =
|
|
{
|
|
{ FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
|
/* BRC_FUC */
|
|
{ FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC },
|
|
/* BRC_FRA */
|
|
{ FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC },
|
|
/* BRA_FDA */
|
|
{ FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
|
/* BRA_FRC */
|
|
{ FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA },
|
|
/* BLC_FDC */
|
|
{ FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC },
|
|
/* BLC_FLA */
|
|
{ FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC },
|
|
/* BLA_FUA */
|
|
{ FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA },
|
|
/* BLA_FLC */
|
|
{ FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC },
|
|
/* BUC_FUA */
|
|
{ FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA },
|
|
/* BUC_FLC */
|
|
{ FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
|
/* BUA_FUC */
|
|
{ FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC },
|
|
/* BUA_FRA */
|
|
{ FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC },
|
|
/* BDC_FDA */
|
|
{ FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
|
/* BDC_FRC */
|
|
{ FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA },
|
|
/* BDA_FDC */
|
|
{ FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC },
|
|
/* BDA_FLA */
|
|
|
|
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA },
|
|
/* FRC_BUC */
|
|
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC },
|
|
/* FRC_BRA */
|
|
{ BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC },
|
|
/* FRA_BDA */
|
|
{ BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA },
|
|
/* FRA_BRC */
|
|
{ BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA },
|
|
/* FLC_BDC */
|
|
{ BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC },
|
|
/* FLC_BLA */
|
|
{ BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC },
|
|
/* FLA_BUA */
|
|
{ BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA },
|
|
/* FLA_BLC */
|
|
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC },
|
|
/* FUC_BUA */
|
|
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA },
|
|
/* FUC_BLC */
|
|
{ BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA },
|
|
/* FUA_BUC */
|
|
{ BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC },
|
|
/* FUA_BRA */
|
|
{ BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC },
|
|
/* FDC_BDA */
|
|
{ BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA },
|
|
/* FDC_BRC */
|
|
{ BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA },
|
|
/* FDA_BDC */
|
|
{ BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC },
|
|
/* FDA_BLA */
|
|
};
|
|
|
|
int main (int argc, string argv[])
|
|
{
|
|
long c;
|
|
|
|
while ((c = getopt(argc, argv, "h")) != -1) {
|
|
switch(c) {
|
|
case 'h':
|
|
Help();
|
|
exit(-1);
|
|
break;
|
|
default:
|
|
fprintf(stderr, "Only valid option is \"-h\".\n");
|
|
exit(-1);
|
|
break;
|
|
}
|
|
}
|
|
|
|
Global = NULL;
|
|
initparam(defv);
|
|
startrun();
|
|
initoutput();
|
|
tab_init();
|
|
|
|
Global->tracktime = 0;
|
|
Global->partitiontime = 0;
|
|
Global->treebuildtime = 0;
|
|
Global->forcecalctime = 0;
|
|
Global->current_id = 0;
|
|
|
|
CLOCK(Global->computestart);
|
|
|
|
printf("COMPUTESTART = %12lu\n",Global->computestart);
|
|
|
|
CREATE(SlaveStart, NPROC);
|
|
|
|
WAIT_FOR_END(NPROC);
|
|
|
|
CLOCK(Global->computeend);
|
|
|
|
printf("COMPUTEEND = %12lu\n",Global->computeend);
|
|
printf("COMPUTETIME = %12lu\n",Global->computeend - Global->computestart);
|
|
printf("TRACKTIME = %12lu\n",Global->tracktime);
|
|
printf("PARTITIONTIME = %12lu\t%5.2f\n",Global->partitiontime,
|
|
((float)Global->partitiontime)/Global->tracktime);
|
|
printf("TREEBUILDTIME = %12lu\t%5.2f\n",Global->treebuildtime,
|
|
((float)Global->treebuildtime)/Global->tracktime);
|
|
printf("FORCECALCTIME = %12lu\t%5.2f\n",Global->forcecalctime,
|
|
((float)Global->forcecalctime)/Global->tracktime);
|
|
printf("RESTTIME = %12lu\t%5.2f\n",
|
|
Global->tracktime - Global->partitiontime -
|
|
Global->treebuildtime - Global->forcecalctime,
|
|
((float)(Global->tracktime-Global->partitiontime-
|
|
Global->treebuildtime-Global->forcecalctime))/
|
|
Global->tracktime);
|
|
MAIN_END;
|
|
}
|
|
|
|
/*
|
|
* ANLINIT : initialize ANL macros
|
|
*/
|
|
void ANLinit()
|
|
{
|
|
MAIN_INITENV(,70000000,);
|
|
/* Allocate global, shared memory */
|
|
|
|
Global = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory));
|
|
if (Global==NULL) error("No initialization for Global\n");
|
|
|
|
BARINIT(Global->Barrier, NPROC);
|
|
|
|
LOCKINIT(Global->CountLock);
|
|
LOCKINIT(Global->io_lock);
|
|
}
|
|
|
|
/*
|
|
* INIT_ROOT: Processor 0 reinitialize the global root at each time step
|
|
*/
|
|
void init_root()
|
|
{
|
|
long i;
|
|
|
|
Global->G_root=Local[0].ctab;
|
|
Global->G_root->seqnum = 0;
|
|
Type(Global->G_root) = CELL;
|
|
Done(Global->G_root) = FALSE;
|
|
Level(Global->G_root) = IMAX >> 1;
|
|
for (i = 0; i < NSUB; i++) {
|
|
Subp(Global->G_root)[i] = NULL;
|
|
}
|
|
Local[0].mynumcell=1;
|
|
}
|
|
|
|
long Log_base_2(long number)
|
|
{
|
|
long cumulative;
|
|
long out;
|
|
|
|
cumulative = 1;
|
|
for (out = 0; out < 20; out++) {
|
|
if (cumulative == number) {
|
|
return(out);
|
|
}
|
|
else {
|
|
cumulative = cumulative * 2;
|
|
}
|
|
}
|
|
|
|
fprintf(stderr,"Log_base_2: couldn't find log2 of %ld\n", number);
|
|
exit(-1);
|
|
}
|
|
|
|
/*
|
|
* TAB_INIT : allocate body and cell data space
|
|
*/
|
|
|
|
void tab_init()
|
|
{
|
|
long i;
|
|
|
|
/*allocate leaf/cell space */
|
|
maxleaf = (long) ((double) fleaves * nbody);
|
|
maxcell = fcells * maxleaf;
|
|
for (i = 0; i < NPROC; ++i) {
|
|
Local[i].ctab = (cellptr) G_MALLOC((maxcell / NPROC) * sizeof(cell));
|
|
Local[i].ltab = (leafptr) G_MALLOC((maxleaf / NPROC) * sizeof(leaf));
|
|
}
|
|
|
|
/*allocate space for personal lists of body pointers */
|
|
maxmybody = (nbody+maxleaf*MAX_BODIES_PER_LEAF)/NPROC;
|
|
Local[0].mybodytab = (bodyptr*) G_MALLOC(NPROC*maxmybody*sizeof(bodyptr));
|
|
/* space is allocated so that every */
|
|
/* process can have a maximum of maxmybody pointers to bodies */
|
|
/* then there is an array of bodies called bodytab which is */
|
|
/* allocated in the distribution generation or when the distr. */
|
|
/* file is read */
|
|
maxmycell = maxcell / NPROC;
|
|
maxmyleaf = maxleaf / NPROC;
|
|
Local[0].mycelltab = (cellptr*) G_MALLOC(NPROC*maxmycell*sizeof(cellptr));
|
|
Local[0].myleaftab = (leafptr*) G_MALLOC(NPROC*maxmyleaf*sizeof(leafptr));
|
|
|
|
CellLock = (struct CellLockType *) G_MALLOC(sizeof(struct CellLockType));
|
|
ALOCKINIT(CellLock->CL,MAXLOCK);
|
|
}
|
|
|
|
/*
|
|
* SLAVESTART: main task for each processor
|
|
*/
|
|
void SlaveStart()
|
|
{
|
|
long ProcessId;
|
|
|
|
/* Get unique ProcessId */
|
|
LOCK(Global->CountLock);
|
|
ProcessId = Global->current_id++;
|
|
UNLOCK(Global->CountLock);
|
|
|
|
BARINCLUDE(Global->Barrier);
|
|
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
|
processors to avoid migration */
|
|
|
|
/* initialize mybodytabs */
|
|
Local[ProcessId].mybodytab = Local[0].mybodytab + (maxmybody * ProcessId);
|
|
/* note that every process has its own copy */
|
|
/* of mybodytab, which was initialized to the */
|
|
/* beginning of the whole array by proc. 0 */
|
|
/* before create */
|
|
Local[ProcessId].mycelltab = Local[0].mycelltab + (maxmycell * ProcessId);
|
|
Local[ProcessId].myleaftab = Local[0].myleaftab + (maxmyleaf * ProcessId);
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might distribute the
|
|
data across physically distributed memories as desired.
|
|
|
|
One way to do this is as follows:
|
|
|
|
long i;
|
|
|
|
if (ProcessId == 0) {
|
|
for (i=0;i<NPROC;i++) {
|
|
Place all addresses x such that
|
|
&(Local[i]) <= x < &(Local[i])+
|
|
sizeof(struct local_memory) on node i
|
|
Place all addresses x such that
|
|
&(Local[i].mybodytab) <= x < &(Local[i].mybodytab)+
|
|
maxmybody * sizeof(bodyptr) - 1 on node i
|
|
Place all addresses x such that
|
|
&(Local[i].mycelltab) <= x < &(Local[i].mycelltab)+
|
|
maxmycell * sizeof(cellptr) - 1 on node i
|
|
Place all addresses x such that
|
|
&(Local[i].myleaftab) <= x < &(Local[i].myleaftab)+
|
|
maxmyleaf * sizeof(leafptr) - 1 on node i
|
|
}
|
|
}
|
|
|
|
barrier(Global->Barstart,NPROC);
|
|
|
|
*/
|
|
|
|
Local[ProcessId].tout = Local[0].tout;
|
|
Local[ProcessId].tnow = Local[0].tnow;
|
|
Local[ProcessId].nstep = Local[0].nstep;
|
|
|
|
find_my_initial_bodies(bodytab, nbody, ProcessId);
|
|
|
|
/* main loop */
|
|
while (Local[ProcessId].tnow < tstop + 0.1 * dtime) {
|
|
stepsystem(ProcessId);
|
|
// printtree(Global->G_root);
|
|
// printf("Going to next step!!!\n");
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* STARTRUN: startup hierarchical N-body code.
|
|
*/
|
|
|
|
void startrun()
|
|
{
|
|
long seed;
|
|
|
|
infile = getparam("in");
|
|
if (*infile != '\0'/*NULL*/) {
|
|
inputdata();
|
|
}
|
|
else {
|
|
nbody = getiparam("nbody");
|
|
if (nbody < 1) {
|
|
error("startrun: absurd nbody\n");
|
|
}
|
|
seed = getiparam("seed");
|
|
}
|
|
|
|
outfile = getparam("out");
|
|
dtime = getdparam("dtime");
|
|
dthf = 0.5 * dtime;
|
|
eps = getdparam("eps");
|
|
epssq = eps*eps;
|
|
tol = getdparam("tol");
|
|
tolsq = tol*tol;
|
|
fcells = getdparam("fcells");
|
|
fleaves = getdparam("fleaves");
|
|
tstop = getdparam("tstop");
|
|
dtout = getdparam("dtout");
|
|
NPROC = getiparam("NPROC");
|
|
Local[0].nstep = 0;
|
|
pranset(seed);
|
|
testdata();
|
|
ANLinit();
|
|
setbound();
|
|
Local[0].tout = Local[0].tnow + dtout;
|
|
}
|
|
|
|
/*
|
|
* TESTDATA: generate Plummer model initial conditions for test runs,
|
|
* scaled to units such that M = -4E = G = 1 (Henon, Hegge, etc).
|
|
* See Aarseth, SJ, Henon, M, & Wielen, R (1974) Astr & Ap, 37, 183.
|
|
*/
|
|
|
|
#define MFRAC 0.999 /* mass cut off at MFRAC of total */
|
|
|
|
void testdata()
|
|
{
|
|
real rsc, vsc, r, v, x, y;
|
|
vector cmr, cmv;
|
|
register bodyptr p;
|
|
long rejects = 0;
|
|
long halfnbody, i;
|
|
float offset;
|
|
register bodyptr cp;
|
|
|
|
headline = "Hack code: Plummer model";
|
|
Local[0].tnow = 0.0;
|
|
bodytab = (bodyptr) G_MALLOC(nbody * sizeof(body));
|
|
if (bodytab == NULL) {
|
|
error("testdata: not enough memory\n");
|
|
}
|
|
rsc = 9 * PI / 16;
|
|
vsc = sqrt(1.0 / rsc);
|
|
|
|
CLRV(cmr);
|
|
CLRV(cmv);
|
|
|
|
halfnbody = nbody / 2;
|
|
if (nbody % 2 != 0) halfnbody++;
|
|
for (p = bodytab; p < bodytab+halfnbody; p++) {
|
|
Type(p) = BODY;
|
|
Mass(p) = 1.0 / nbody;
|
|
Cost(p) = 1;
|
|
|
|
r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1);
|
|
/* reject radii greater than 10 */
|
|
while (r > 9.0) {
|
|
rejects++;
|
|
r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1);
|
|
}
|
|
pickshell(Pos(p), rsc * r);
|
|
ADDV(cmr, cmr, Pos(p));
|
|
do {
|
|
x = xrand(0.0, 1.0);
|
|
y = xrand(0.0, 0.1);
|
|
|
|
} while (y > x*x * pow(1 - x*x, 3.5));
|
|
|
|
v = sqrt(2.0) * x / pow(1 + r*r, 0.25);
|
|
pickshell(Vel(p), vsc * v);
|
|
ADDV(cmv, cmv, Vel(p));
|
|
}
|
|
|
|
offset = 4.0;
|
|
|
|
for (p = bodytab + halfnbody; p < bodytab+nbody; p++) {
|
|
Type(p) = BODY;
|
|
Mass(p) = 1.0 / nbody;
|
|
Cost(p) = 1;
|
|
|
|
cp = p - halfnbody;
|
|
for (i = 0; i < NDIM; i++){
|
|
Pos(p)[i] = Pos(cp)[i] + offset;
|
|
Vel(p)[i] = Vel(cp)[i];
|
|
}
|
|
ADDV(cmr, cmr, Pos(p));
|
|
ADDV(cmv, cmv, Vel(p));
|
|
}
|
|
|
|
DIVVS(cmr, cmr, (real) nbody);
|
|
DIVVS(cmv, cmv, (real) nbody);
|
|
|
|
for (p = bodytab; p < bodytab+nbody; p++) {
|
|
SUBV(Pos(p), Pos(p), cmr);
|
|
SUBV(Vel(p), Vel(p), cmv);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* PICKSHELL: pick a random point on a sphere of specified radius.
|
|
*/
|
|
|
|
void pickshell(real vec[], real rad)
|
|
{
|
|
register long k;
|
|
double rsq, rsc;
|
|
|
|
do {
|
|
for (k = 0; k < NDIM; k++) {
|
|
vec[k] = xrand(-1.0, 1.0);
|
|
}
|
|
DOTVP(rsq, vec, vec);
|
|
} while (rsq > 1.0);
|
|
|
|
rsc = rad / sqrt(rsq);
|
|
MULVS(vec, vec, rsc);
|
|
}
|
|
|
|
|
|
|
|
long intpow(long i, long j)
|
|
{
|
|
long k;
|
|
long temp = 1;
|
|
|
|
for (k = 0; k < j; k++)
|
|
temp = temp*i;
|
|
return temp;
|
|
}
|
|
|
|
|
|
/*
|
|
* STEPSYSTEM: advance N-body system one time-step.
|
|
*/
|
|
|
|
void stepsystem(long ProcessId)
|
|
{
|
|
long i;
|
|
real Cavg;
|
|
bodyptr p,*pp;
|
|
vector dvel, vel1, dpos;
|
|
long trackstart, trackend;
|
|
long partitionstart, partitionend;
|
|
long treebuildstart, treebuildend;
|
|
long forcecalcstart, forcecalcend;
|
|
|
|
if (Local[ProcessId].nstep == 2) {
|
|
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
|
statistics that one is measuring about the parallel execution */
|
|
}
|
|
|
|
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
CLOCK(trackstart);
|
|
}
|
|
|
|
if (ProcessId == 0) {
|
|
init_root();
|
|
}
|
|
else {
|
|
Local[ProcessId].mynumcell = 0;
|
|
Local[ProcessId].mynumleaf = 0;
|
|
}
|
|
|
|
|
|
/* start at same time */
|
|
BARRIER(Global->Barrier,NPROC);
|
|
|
|
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
CLOCK(treebuildstart);
|
|
}
|
|
|
|
/* load bodies into tree */
|
|
maketree(ProcessId);
|
|
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
CLOCK(treebuildend);
|
|
Global->treebuildtime += treebuildend - treebuildstart;
|
|
}
|
|
|
|
Housekeep(ProcessId);
|
|
|
|
Cavg = (real) Cost(Global->G_root) / (real)NPROC ;
|
|
Local[ProcessId].workMin = (long) (Cavg * ProcessId);
|
|
Local[ProcessId].workMax = (long) (Cavg * (ProcessId + 1)
|
|
+ (ProcessId == (NPROC - 1)));
|
|
|
|
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
CLOCK(partitionstart);
|
|
}
|
|
|
|
Local[ProcessId].mynbody = 0;
|
|
find_my_bodies(Global->G_root, 0, BRC_FUC, ProcessId );
|
|
|
|
/* B*RRIER(Global->Barcom,NPROC); */
|
|
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
CLOCK(partitionend);
|
|
Global->partitiontime += partitionend - partitionstart;
|
|
}
|
|
|
|
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
CLOCK(forcecalcstart);
|
|
}
|
|
|
|
ComputeForces(ProcessId);
|
|
|
|
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
CLOCK(forcecalcend);
|
|
Global->forcecalctime += forcecalcend - forcecalcstart;
|
|
}
|
|
|
|
/* advance my bodies */
|
|
for (pp = Local[ProcessId].mybodytab;
|
|
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody; pp++) {
|
|
p = *pp;
|
|
MULVS(dvel, Acc(p), dthf);
|
|
ADDV(vel1, Vel(p), dvel);
|
|
MULVS(dpos, vel1, dtime);
|
|
ADDV(Pos(p), Pos(p), dpos);
|
|
ADDV(Vel(p), vel1, dvel);
|
|
|
|
for (i = 0; i < NDIM; i++) {
|
|
if (Pos(p)[i]<Local[ProcessId].min[i]) {
|
|
Local[ProcessId].min[i]=Pos(p)[i];
|
|
}
|
|
if (Pos(p)[i]>Local[ProcessId].max[i]) {
|
|
Local[ProcessId].max[i]=Pos(p)[i] ;
|
|
}
|
|
}
|
|
}
|
|
LOCK(Global->CountLock);
|
|
for (i = 0; i < NDIM; i++) {
|
|
if (Global->min[i] > Local[ProcessId].min[i]) {
|
|
Global->min[i] = Local[ProcessId].min[i];
|
|
}
|
|
if (Global->max[i] < Local[ProcessId].max[i]) {
|
|
Global->max[i] = Local[ProcessId].max[i];
|
|
}
|
|
}
|
|
UNLOCK(Global->CountLock);
|
|
|
|
/* bar needed to make sure that every process has computed its min */
|
|
/* and max coordinates, and has accumulated them into the global */
|
|
/* min and max, before the new dimensions are computed */
|
|
BARRIER(Global->Barrier,NPROC);
|
|
|
|
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
CLOCK(trackend);
|
|
Global->tracktime += trackend - trackstart;
|
|
}
|
|
if (ProcessId==0) {
|
|
Global->rsize=0;
|
|
SUBV(Global->max,Global->max,Global->min);
|
|
for (i = 0; i < NDIM; i++) {
|
|
if (Global->rsize < Global->max[i]) {
|
|
Global->rsize = Global->max[i];
|
|
}
|
|
}
|
|
ADDVS(Global->rmin,Global->min,-Global->rsize/100000.0);
|
|
Global->rsize = 1.00002*Global->rsize;
|
|
SETVS(Global->min,1E99);
|
|
SETVS(Global->max,-1E99);
|
|
}
|
|
Local[ProcessId].nstep++;
|
|
Local[ProcessId].tnow = Local[ProcessId].tnow + dtime;
|
|
}
|
|
|
|
|
|
|
|
void ComputeForces(long ProcessId)
|
|
{
|
|
bodyptr p,*pp;
|
|
vector acc1, dacc, dvel;
|
|
|
|
for (pp = Local[ProcessId].mybodytab;
|
|
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody;pp++) {
|
|
p = *pp;
|
|
SETV(acc1, Acc(p));
|
|
Cost(p)=0;
|
|
hackgrav(p,ProcessId);
|
|
Local[ProcessId].myn2bcalc += Local[ProcessId].myn2bterm;
|
|
Local[ProcessId].mynbccalc += Local[ProcessId].mynbcterm;
|
|
if (!Local[ProcessId].skipself) { /* did we miss self-int? */
|
|
Local[ProcessId].myselfint++; /* count another goofup */
|
|
}
|
|
if (Local[ProcessId].nstep > 0) {
|
|
/* use change in accel to make 2nd order correction to vel */
|
|
SUBV(dacc, Acc(p), acc1);
|
|
MULVS(dvel, dacc, dthf);
|
|
ADDV(Vel(p), Vel(p), dvel);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* FIND_MY_INITIAL_BODIES: puts into mybodytab the initial list of bodies
|
|
* assigned to the processor.
|
|
*/
|
|
|
|
void find_my_initial_bodies(bodyptr btab, long nbody, long ProcessId)
|
|
{
|
|
long extra,offset,i;
|
|
|
|
Local[ProcessId].mynbody = nbody / NPROC;
|
|
extra = nbody % NPROC;
|
|
if (ProcessId < extra) {
|
|
Local[ProcessId].mynbody++;
|
|
offset = Local[ProcessId].mynbody * ProcessId;
|
|
}
|
|
if (ProcessId >= extra) {
|
|
offset = (Local[ProcessId].mynbody+1) * extra + (ProcessId - extra)
|
|
* Local[ProcessId].mynbody;
|
|
}
|
|
for (i=0; i < Local[ProcessId].mynbody; i++) {
|
|
Local[ProcessId].mybodytab[i] = &(btab[offset+i]);
|
|
}
|
|
BARRIER(Global->Barrier,NPROC);
|
|
}
|
|
|
|
|
|
void find_my_bodies(nodeptr mycell, long work, long direction, long ProcessId)
|
|
{
|
|
long i;
|
|
leafptr l;
|
|
nodeptr qptr;
|
|
|
|
if (Type(mycell) == LEAF) {
|
|
l = (leafptr) mycell;
|
|
for (i = 0; i < l->num_bodies; i++) {
|
|
if (work >= Local[ProcessId].workMin - .1) {
|
|
if((Local[ProcessId].mynbody+2) > maxmybody) {
|
|
error("find_my_bodies: Processor %ld needs more than %ld bodies; increase fleaves\n", ProcessId, maxmybody);
|
|
}
|
|
Local[ProcessId].mybodytab[Local[ProcessId].mynbody++] =
|
|
Bodyp(l)[i];
|
|
}
|
|
work += Cost(Bodyp(l)[i]);
|
|
if (work >= Local[ProcessId].workMax-.1) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
for(i = 0; (i < NSUB) && (work < (Local[ProcessId].workMax - .1)); i++){
|
|
qptr = Subp(mycell)[Child_Sequence[direction][i]];
|
|
if (qptr!=NULL) {
|
|
if ((work+Cost(qptr)) >= (Local[ProcessId].workMin -.1)) {
|
|
find_my_bodies(qptr,work, Direction_Sequence[direction][i],
|
|
ProcessId);
|
|
}
|
|
work += Cost(qptr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* HOUSEKEEP: reinitialize the different variables (in particular global
|
|
* variables) between each time step.
|
|
*/
|
|
|
|
void Housekeep(long ProcessId)
|
|
{
|
|
Local[ProcessId].myn2bcalc = Local[ProcessId].mynbccalc
|
|
= Local[ProcessId].myselfint = 0;
|
|
SETVS(Local[ProcessId].min,1E99);
|
|
SETVS(Local[ProcessId].max,-1E99);
|
|
}
|
|
|
|
/*
|
|
* SETBOUND: Compute the initial size of the root of the tree; only done
|
|
* before first time step, and only processor 0 does it
|
|
*/
|
|
void setbound()
|
|
{
|
|
long i;
|
|
real side ;
|
|
bodyptr p;
|
|
|
|
SETVS(Local[0].min,1E99);
|
|
SETVS(Local[0].max,-1E99);
|
|
side=0;
|
|
|
|
for (p = bodytab; p < bodytab+nbody; p++) {
|
|
for (i=0; i<NDIM;i++) {
|
|
if (Pos(p)[i]<Local[0].min[i]) Local[0].min[i]=Pos(p)[i] ;
|
|
if (Pos(p)[i]>Local[0].max[i]) Local[0].max[i]=Pos(p)[i] ;
|
|
}
|
|
}
|
|
|
|
SUBV(Local[0].max,Local[0].max,Local[0].min);
|
|
for (i=0; i<NDIM;i++) if (side<Local[0].max[i]) side=Local[0].max[i];
|
|
ADDVS(Global->rmin,Local[0].min,-side/100000.0);
|
|
Global->rsize = 1.00002*side;
|
|
SETVS(Global->max,-1E99);
|
|
SETVS(Global->min,1E99);
|
|
}
|
|
|
|
void Help()
|
|
{
|
|
printf("There are a total of twelve parameters, and all of them have default values.\n");
|
|
printf("\n");
|
|
printf("1) infile (char*) : The name of an input file that contains particle data. \n");
|
|
printf(" The format of the file is:\n");
|
|
printf("\ta) An int representing the number of particles in the distribution\n");
|
|
printf("\tb) An int representing the dimensionality of the problem (3-D)\n");
|
|
printf("\tc) A double representing the current time of the simulation\n");
|
|
printf("\td) Doubles representing the masses of all the particles\n");
|
|
printf("\te) A vector (length equal to the dimensionality) of doubles\n");
|
|
printf("\t representing the positions of all the particles\n");
|
|
printf("\tf) A vector (length equal to the dimensionality) of doubles\n");
|
|
printf("\t representing the velocities of all the particles\n");
|
|
printf("\n");
|
|
printf(" Each of these numbers can be separated by any amount of whitespace.\n");
|
|
printf("\n");
|
|
printf("2) nbody (int) : If no input file is specified (the first line is blank), this\n");
|
|
printf(" number specifies the number of particles to generate under a plummer model.\n");
|
|
printf(" Default is 16384.\n");
|
|
printf("\n");
|
|
printf("3) seed (int) : The seed used by the random number generator.\n");
|
|
printf(" Default is 123.\n");
|
|
printf("\n");
|
|
printf("4) outfile (char*) : The name of the file that snapshots will be printed to. \n");
|
|
printf(" This feature has been disabled in the SPLASH release.\n");
|
|
printf(" Default is NULL.\n");
|
|
printf("\n");
|
|
printf("5) dtime (double) : The integration time-step.\n");
|
|
printf(" Default is 0.025.\n");
|
|
printf("\n");
|
|
printf("6) eps (double) : The usual potential softening\n");
|
|
printf(" Default is 0.05.\n");
|
|
printf("\n");
|
|
printf("7) tol (double) : The cell subdivision tolerance.\n");
|
|
printf(" Default is 1.0.\n");
|
|
printf("\n");
|
|
printf("8) fcells (double) : The total number of cells created is equal to \n");
|
|
printf(" fcells * number of leaves.\n");
|
|
printf(" Default is 2.0.\n");
|
|
printf("\n");
|
|
printf("9) fleaves (double) : The total number of leaves created is equal to \n");
|
|
printf(" fleaves * nbody.\n");
|
|
printf(" Default is 0.5.\n");
|
|
printf("\n");
|
|
printf("10) tstop (double) : The time to stop integration.\n");
|
|
printf(" Default is 0.075.\n");
|
|
printf("\n");
|
|
printf("11) dtout (double) : The data-output interval.\n");
|
|
printf(" Default is 0.25.\n");
|
|
printf("\n");
|
|
printf("12) NPROC (int) : The number of processors.\n");
|
|
printf(" Default is 1.\n");
|
|
}
|