/*************************************************************************/ /* */ /* Copyright (c) 1994 Stanford University */ /* */ /* All rights reserved. */ /* */ /* Permission is given to use, copy, and modify this software for any */ /* non-commercial purpose as long as this copyright notice is not */ /* removed. All other uses, including redistribution in whole or in */ /* part, are forbidden without prior written permission. */ /* */ /* This software is provided with absolutely no warranty and no */ /* support. */ /* */ /*************************************************************************/ /* Usage: BARNES < inputfile Command line options: -h : Print out input file description Input parameters should be placed in a file and redirected through standard input. There are a total of twelve parameters, and all of them have default values. 1) infile (char*) : The name of an input file that contains particle data. The format of the file is: a) An int representing the number of particles in the distribution b) An int representing the dimensionality of the problem (3-D) c) A double representing the current time of the simulation d) Doubles representing the masses of all the particles e) A vector (length equal to the dimensionality) of doubles representing the positions of all the particles f) A vector (length equal to the dimensionality) of doubles representing the velocities of all the particles Each of these numbers can be separated by any amount of whitespace. 2) nbody (int) : If no input file is specified (the first line is blank), this number specifies the number of particles to generate under a plummer model. Default is 16384. 3) seed (int) : The seed used by the random number generator. Default is 123. 4) outfile (char*) : The name of the file that snapshots will be printed to. This feature has been disabled in the SPLASH release. Default is NULL. 5) dtime (double) : The integration time-step. Default is 0.025. 6) eps (double) : The usual potential softening Default is 0.05. 7) tol (double) : The cell subdivision tolerance. Default is 1.0. 8) fcells (double) : Number of cells created = fcells * number of leaves. Default is 2.0. 9) fleaves (double) : Number of leaves created = fleaves * nbody. Default is 0.5. 10) tstop (double) : The time to stop integration. Default is 0.075. 11) dtout (double) : The data-output interval. Default is 0.25. 12) NPROC (int) : The number of processors. Default is 1. */ MAIN_ENV #define global /* nada */ #include "stdinc.h" string defv[] = { /* DEFAULT PARAMETER VALUES */ /* file names for input/output */ "in=", /* snapshot of initial conditions */ "out=", /* stream of output snapshots */ /* params, used if no input specified, to make a Plummer Model */ "nbody=16384", /* number of particles to generate */ "seed=123", /* random number generator seed */ /* params to control N-body integration */ "dtime=0.025", /* integration time-step */ "eps=0.05", /* usual potential softening */ "tol=1.0", /* cell subdivision tolerence */ "fcells=2.0", /* cell allocation parameter */ "fleaves=0.5", /* leaf allocation parameter */ "tstop=0.075", /* time to stop integration */ "dtout=0.25", /* data-output interval */ "NPROC=1", /* number of processors */ }; /* The more complicated 3D case */ #define NUM_DIRECTIONS 32 #define BRC_FUC 0 #define BRC_FRA 1 #define BRA_FDA 2 #define BRA_FRC 3 #define BLC_FDC 4 #define BLC_FLA 5 #define BLA_FUA 6 #define BLA_FLC 7 #define BUC_FUA 8 #define BUC_FLC 9 #define BUA_FUC 10 #define BUA_FRA 11 #define BDC_FDA 12 #define BDC_FRC 13 #define BDA_FDC 14 #define BDA_FLA 15 #define FRC_BUC 16 #define FRC_BRA 17 #define FRA_BDA 18 #define FRA_BRC 19 #define FLC_BDC 20 #define FLC_BLA 21 #define FLA_BUA 22 #define FLA_BLC 23 #define FUC_BUA 24 #define FUC_BLC 25 #define FUA_BUC 26 #define FUA_BRA 27 #define FDC_BDA 28 #define FDC_BRC 29 #define FDA_BDC 30 #define FDA_BLA 31 static long Child_Sequence[NUM_DIRECTIONS][NSUB] = { { 2, 5, 6, 1, 0, 3, 4, 7}, /* BRC_FUC */ { 2, 5, 6, 1, 0, 7, 4, 3}, /* BRC_FRA */ { 1, 6, 5, 2, 3, 0, 7, 4}, /* BRA_FDA */ { 1, 6, 5, 2, 3, 4, 7, 0}, /* BRA_FRC */ { 6, 1, 2, 5, 4, 7, 0, 3}, /* BLC_FDC */ { 6, 1, 2, 5, 4, 3, 0, 7}, /* BLC_FLA */ { 5, 2, 1, 6, 7, 4, 3, 0}, /* BLA_FUA */ { 5, 2, 1, 6, 7, 0, 3, 4}, /* BLA_FLC */ { 1, 2, 5, 6, 7, 4, 3, 0}, /* BUC_FUA */ { 1, 2, 5, 6, 7, 0, 3, 4}, /* BUC_FLC */ { 6, 5, 2, 1, 0, 3, 4, 7}, /* BUA_FUC */ { 6, 5, 2, 1, 0, 7, 4, 3}, /* BUA_FRA */ { 5, 6, 1, 2, 3, 0, 7, 4}, /* BDC_FDA */ { 5, 6, 1, 2, 3, 4, 7, 0}, /* BDC_FRC */ { 2, 1, 6, 5, 4, 7, 0, 3}, /* BDA_FDC */ { 2, 1, 6, 5, 4, 3, 0, 7}, /* BDA_FLA */ { 3, 4, 7, 0, 1, 2, 5, 6}, /* FRC_BUC */ { 3, 4, 7, 0, 1, 6, 5, 2}, /* FRC_BRA */ { 0, 7, 4, 3, 2, 1, 6, 5}, /* FRA_BDA */ { 0, 7, 4, 3, 2, 5, 6, 1}, /* FRA_BRC */ { 7, 0, 3, 4, 5, 6, 1, 2}, /* FLC_BDC */ { 7, 0, 3, 4, 5, 2, 1, 6}, /* FLC_BLA */ { 4, 3, 0, 7, 6, 5, 2, 1}, /* FLA_BUA */ { 4, 3, 0, 7, 6, 1, 2, 5}, /* FLA_BLC */ { 0, 3, 4, 7, 6, 5, 2, 1}, /* FUC_BUA */ { 0, 3, 4, 7, 6, 1, 2, 5}, /* FUC_BLC */ { 7, 4, 3, 0, 1, 2, 5, 6}, /* FUA_BUC */ { 7, 4, 3, 0, 1, 6, 5, 2}, /* FUA_BRA */ { 4, 7, 0, 3, 2, 1, 6, 5}, /* FDC_BDA */ { 4, 7, 0, 3, 2, 5, 6, 1}, /* FDC_BRC */ { 3, 0, 7, 4, 5, 6, 1, 2}, /* FDA_BDC */ { 3, 0, 7, 4, 5, 2, 1, 6}, /* FDA_BLA */ }; static long Direction_Sequence[NUM_DIRECTIONS][NSUB] = { { FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA }, /* BRC_FUC */ { FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC }, /* BRC_FRA */ { FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC }, /* BRA_FDA */ { FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA }, /* BRA_FRC */ { FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA }, /* BLC_FDC */ { FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC }, /* BLC_FLA */ { FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC }, /* BLA_FUA */ { FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA }, /* BLA_FLC */ { FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC }, /* BUC_FUA */ { FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA }, /* BUC_FLC */ { FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA }, /* BUA_FUC */ { FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC }, /* BUA_FRA */ { FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC }, /* BDC_FDA */ { FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA }, /* BDC_FRC */ { FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA }, /* BDA_FDC */ { FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC }, /* BDA_FLA */ { BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA }, /* FRC_BUC */ { BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC }, /* FRC_BRA */ { BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC }, /* FRA_BDA */ { BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA }, /* FRA_BRC */ { BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA }, /* FLC_BDC */ { BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC }, /* FLC_BLA */ { BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC }, /* FLA_BUA */ { BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA }, /* FLA_BLC */ { BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC }, /* FUC_BUA */ { BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA }, /* FUC_BLC */ { BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA }, /* FUA_BUC */ { BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC }, /* FUA_BRA */ { BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC }, /* FDC_BDA */ { BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA }, /* FDC_BRC */ { BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA }, /* FDA_BDC */ { BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC }, /* FDA_BLA */ }; int main (int argc, string argv[]) { long c; while ((c = getopt(argc, argv, "h")) != -1) { switch(c) { case 'h': Help(); exit(-1); break; default: fprintf(stderr, "Only valid option is \"-h\".\n"); exit(-1); break; } } Global = NULL; initparam(defv); startrun(); initoutput(); tab_init(); Global->tracktime = 0; Global->partitiontime = 0; Global->treebuildtime = 0; Global->forcecalctime = 0; Global->current_id = 0; CLOCK(Global->computestart); printf("COMPUTESTART = %12lu\n",Global->computestart); CREATE(SlaveStart, NPROC); WAIT_FOR_END(NPROC); CLOCK(Global->computeend); printf("COMPUTEEND = %12lu\n",Global->computeend); printf("COMPUTETIME = %12lu\n",Global->computeend - Global->computestart); printf("TRACKTIME = %12lu\n",Global->tracktime); printf("PARTITIONTIME = %12lu\t%5.2f\n",Global->partitiontime, ((float)Global->partitiontime)/Global->tracktime); printf("TREEBUILDTIME = %12lu\t%5.2f\n",Global->treebuildtime, ((float)Global->treebuildtime)/Global->tracktime); printf("FORCECALCTIME = %12lu\t%5.2f\n",Global->forcecalctime, ((float)Global->forcecalctime)/Global->tracktime); printf("RESTTIME = %12lu\t%5.2f\n", Global->tracktime - Global->partitiontime - Global->treebuildtime - Global->forcecalctime, ((float)(Global->tracktime-Global->partitiontime- Global->treebuildtime-Global->forcecalctime))/ Global->tracktime); MAIN_END; } /* * ANLINIT : initialize ANL macros */ void ANLinit() { MAIN_INITENV(,70000000,); /* Allocate global, shared memory */ Global = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory)); if (Global==NULL) error("No initialization for Global\n"); BARINIT(Global->Barrier, NPROC); LOCKINIT(Global->CountLock); LOCKINIT(Global->io_lock); } /* * INIT_ROOT: Processor 0 reinitialize the global root at each time step */ void init_root() { long i; Global->G_root=Local[0].ctab; Global->G_root->seqnum = 0; Type(Global->G_root) = CELL; Done(Global->G_root) = FALSE; Level(Global->G_root) = IMAX >> 1; for (i = 0; i < NSUB; i++) { Subp(Global->G_root)[i] = NULL; } Local[0].mynumcell=1; } long Log_base_2(long number) { long cumulative; long out; cumulative = 1; for (out = 0; out < 20; out++) { if (cumulative == number) { return(out); } else { cumulative = cumulative * 2; } } fprintf(stderr,"Log_base_2: couldn't find log2 of %ld\n", number); exit(-1); } /* * TAB_INIT : allocate body and cell data space */ void tab_init() { long i; /*allocate leaf/cell space */ maxleaf = (long) ((double) fleaves * nbody); maxcell = fcells * maxleaf; for (i = 0; i < NPROC; ++i) { Local[i].ctab = (cellptr) G_MALLOC((maxcell / NPROC) * sizeof(cell)); Local[i].ltab = (leafptr) G_MALLOC((maxleaf / NPROC) * sizeof(leaf)); } /*allocate space for personal lists of body pointers */ maxmybody = (nbody+maxleaf*MAX_BODIES_PER_LEAF)/NPROC; Local[0].mybodytab = (bodyptr*) G_MALLOC(NPROC*maxmybody*sizeof(bodyptr)); /* space is allocated so that every */ /* process can have a maximum of maxmybody pointers to bodies */ /* then there is an array of bodies called bodytab which is */ /* allocated in the distribution generation or when the distr. */ /* file is read */ maxmycell = maxcell / NPROC; maxmyleaf = maxleaf / NPROC; Local[0].mycelltab = (cellptr*) G_MALLOC(NPROC*maxmycell*sizeof(cellptr)); Local[0].myleaftab = (leafptr*) G_MALLOC(NPROC*maxmyleaf*sizeof(leafptr)); CellLock = (struct CellLockType *) G_MALLOC(sizeof(struct CellLockType)); ALOCKINIT(CellLock->CL,MAXLOCK); } /* * SLAVESTART: main task for each processor */ void SlaveStart() { long ProcessId; /* Get unique ProcessId */ LOCK(Global->CountLock); ProcessId = Global->current_id++; UNLOCK(Global->CountLock); BARINCLUDE(Global->Barrier); /* POSSIBLE ENHANCEMENT: Here is where one might pin processes to processors to avoid migration */ /* initialize mybodytabs */ Local[ProcessId].mybodytab = Local[0].mybodytab + (maxmybody * ProcessId); /* note that every process has its own copy */ /* of mybodytab, which was initialized to the */ /* beginning of the whole array by proc. 0 */ /* before create */ Local[ProcessId].mycelltab = Local[0].mycelltab + (maxmycell * ProcessId); Local[ProcessId].myleaftab = Local[0].myleaftab + (maxmyleaf * ProcessId); /* POSSIBLE ENHANCEMENT: Here is where one might distribute the data across physically distributed memories as desired. One way to do this is as follows: long i; if (ProcessId == 0) { for (i=0;iBarstart,NPROC); */ Local[ProcessId].tout = Local[0].tout; Local[ProcessId].tnow = Local[0].tnow; Local[ProcessId].nstep = Local[0].nstep; find_my_initial_bodies(bodytab, nbody, ProcessId); /* main loop */ while (Local[ProcessId].tnow < tstop + 0.1 * dtime) { stepsystem(ProcessId); // printtree(Global->G_root); // printf("Going to next step!!!\n"); } } /* * STARTRUN: startup hierarchical N-body code. */ void startrun() { long seed; infile = getparam("in"); if (*infile != '\0'/*NULL*/) { inputdata(); } else { nbody = getiparam("nbody"); if (nbody < 1) { error("startrun: absurd nbody\n"); } seed = getiparam("seed"); } outfile = getparam("out"); dtime = getdparam("dtime"); dthf = 0.5 * dtime; eps = getdparam("eps"); epssq = eps*eps; tol = getdparam("tol"); tolsq = tol*tol; fcells = getdparam("fcells"); fleaves = getdparam("fleaves"); tstop = getdparam("tstop"); dtout = getdparam("dtout"); NPROC = getiparam("NPROC"); Local[0].nstep = 0; pranset(seed); testdata(); ANLinit(); setbound(); Local[0].tout = Local[0].tnow + dtout; } /* * TESTDATA: generate Plummer model initial conditions for test runs, * scaled to units such that M = -4E = G = 1 (Henon, Hegge, etc). * See Aarseth, SJ, Henon, M, & Wielen, R (1974) Astr & Ap, 37, 183. */ #define MFRAC 0.999 /* mass cut off at MFRAC of total */ void testdata() { real rsc, vsc, r, v, x, y; vector cmr, cmv; register bodyptr p; long rejects = 0; long halfnbody, i; float offset; register bodyptr cp; headline = "Hack code: Plummer model"; Local[0].tnow = 0.0; bodytab = (bodyptr) G_MALLOC(nbody * sizeof(body)); if (bodytab == NULL) { error("testdata: not enough memory\n"); } rsc = 9 * PI / 16; vsc = sqrt(1.0 / rsc); CLRV(cmr); CLRV(cmv); halfnbody = nbody / 2; if (nbody % 2 != 0) halfnbody++; for (p = bodytab; p < bodytab+halfnbody; p++) { Type(p) = BODY; Mass(p) = 1.0 / nbody; Cost(p) = 1; r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1); /* reject radii greater than 10 */ while (r > 9.0) { rejects++; r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1); } pickshell(Pos(p), rsc * r); ADDV(cmr, cmr, Pos(p)); do { x = xrand(0.0, 1.0); y = xrand(0.0, 0.1); } while (y > x*x * pow(1 - x*x, 3.5)); v = sqrt(2.0) * x / pow(1 + r*r, 0.25); pickshell(Vel(p), vsc * v); ADDV(cmv, cmv, Vel(p)); } offset = 4.0; for (p = bodytab + halfnbody; p < bodytab+nbody; p++) { Type(p) = BODY; Mass(p) = 1.0 / nbody; Cost(p) = 1; cp = p - halfnbody; for (i = 0; i < NDIM; i++){ Pos(p)[i] = Pos(cp)[i] + offset; Vel(p)[i] = Vel(cp)[i]; } ADDV(cmr, cmr, Pos(p)); ADDV(cmv, cmv, Vel(p)); } DIVVS(cmr, cmr, (real) nbody); DIVVS(cmv, cmv, (real) nbody); for (p = bodytab; p < bodytab+nbody; p++) { SUBV(Pos(p), Pos(p), cmr); SUBV(Vel(p), Vel(p), cmv); } } /* * PICKSHELL: pick a random point on a sphere of specified radius. */ void pickshell(real vec[], real rad) { register long k; double rsq, rsc; do { for (k = 0; k < NDIM; k++) { vec[k] = xrand(-1.0, 1.0); } DOTVP(rsq, vec, vec); } while (rsq > 1.0); rsc = rad / sqrt(rsq); MULVS(vec, vec, rsc); } long intpow(long i, long j) { long k; long temp = 1; for (k = 0; k < j; k++) temp = temp*i; return temp; } /* * STEPSYSTEM: advance N-body system one time-step. */ void stepsystem(long ProcessId) { long i; real Cavg; bodyptr p,*pp; vector dvel, vel1, dpos; long trackstart, trackend; long partitionstart, partitionend; long treebuildstart, treebuildend; long forcecalcstart, forcecalcend; if (Local[ProcessId].nstep == 2) { /* POSSIBLE ENHANCEMENT: Here is where one might reset the statistics that one is measuring about the parallel execution */ } if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) { CLOCK(trackstart); } if (ProcessId == 0) { init_root(); } else { Local[ProcessId].mynumcell = 0; Local[ProcessId].mynumleaf = 0; } /* start at same time */ BARRIER(Global->Barrier,NPROC); if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) { CLOCK(treebuildstart); } /* load bodies into tree */ maketree(ProcessId); if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) { CLOCK(treebuildend); Global->treebuildtime += treebuildend - treebuildstart; } Housekeep(ProcessId); Cavg = (real) Cost(Global->G_root) / (real)NPROC ; Local[ProcessId].workMin = (long) (Cavg * ProcessId); Local[ProcessId].workMax = (long) (Cavg * (ProcessId + 1) + (ProcessId == (NPROC - 1))); if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) { CLOCK(partitionstart); } Local[ProcessId].mynbody = 0; find_my_bodies(Global->G_root, 0, BRC_FUC, ProcessId ); /* B*RRIER(Global->Barcom,NPROC); */ if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) { CLOCK(partitionend); Global->partitiontime += partitionend - partitionstart; } if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) { CLOCK(forcecalcstart); } ComputeForces(ProcessId); if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) { CLOCK(forcecalcend); Global->forcecalctime += forcecalcend - forcecalcstart; } /* advance my bodies */ for (pp = Local[ProcessId].mybodytab; pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody; pp++) { p = *pp; MULVS(dvel, Acc(p), dthf); ADDV(vel1, Vel(p), dvel); MULVS(dpos, vel1, dtime); ADDV(Pos(p), Pos(p), dpos); ADDV(Vel(p), vel1, dvel); for (i = 0; i < NDIM; i++) { if (Pos(p)[i]Local[ProcessId].max[i]) { Local[ProcessId].max[i]=Pos(p)[i] ; } } } LOCK(Global->CountLock); for (i = 0; i < NDIM; i++) { if (Global->min[i] > Local[ProcessId].min[i]) { Global->min[i] = Local[ProcessId].min[i]; } if (Global->max[i] < Local[ProcessId].max[i]) { Global->max[i] = Local[ProcessId].max[i]; } } UNLOCK(Global->CountLock); /* bar needed to make sure that every process has computed its min */ /* and max coordinates, and has accumulated them into the global */ /* min and max, before the new dimensions are computed */ BARRIER(Global->Barrier,NPROC); if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) { CLOCK(trackend); Global->tracktime += trackend - trackstart; } if (ProcessId==0) { Global->rsize=0; SUBV(Global->max,Global->max,Global->min); for (i = 0; i < NDIM; i++) { if (Global->rsize < Global->max[i]) { Global->rsize = Global->max[i]; } } ADDVS(Global->rmin,Global->min,-Global->rsize/100000.0); Global->rsize = 1.00002*Global->rsize; SETVS(Global->min,1E99); SETVS(Global->max,-1E99); } Local[ProcessId].nstep++; Local[ProcessId].tnow = Local[ProcessId].tnow + dtime; } void ComputeForces(long ProcessId) { bodyptr p,*pp; vector acc1, dacc, dvel; for (pp = Local[ProcessId].mybodytab; pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody;pp++) { p = *pp; SETV(acc1, Acc(p)); Cost(p)=0; hackgrav(p,ProcessId); Local[ProcessId].myn2bcalc += Local[ProcessId].myn2bterm; Local[ProcessId].mynbccalc += Local[ProcessId].mynbcterm; if (!Local[ProcessId].skipself) { /* did we miss self-int? */ Local[ProcessId].myselfint++; /* count another goofup */ } if (Local[ProcessId].nstep > 0) { /* use change in accel to make 2nd order correction to vel */ SUBV(dacc, Acc(p), acc1); MULVS(dvel, dacc, dthf); ADDV(Vel(p), Vel(p), dvel); } } } /* * FIND_MY_INITIAL_BODIES: puts into mybodytab the initial list of bodies * assigned to the processor. */ void find_my_initial_bodies(bodyptr btab, long nbody, long ProcessId) { long extra,offset,i; Local[ProcessId].mynbody = nbody / NPROC; extra = nbody % NPROC; if (ProcessId < extra) { Local[ProcessId].mynbody++; offset = Local[ProcessId].mynbody * ProcessId; } if (ProcessId >= extra) { offset = (Local[ProcessId].mynbody+1) * extra + (ProcessId - extra) * Local[ProcessId].mynbody; } for (i=0; i < Local[ProcessId].mynbody; i++) { Local[ProcessId].mybodytab[i] = &(btab[offset+i]); } BARRIER(Global->Barrier,NPROC); } void find_my_bodies(nodeptr mycell, long work, long direction, long ProcessId) { long i; leafptr l; nodeptr qptr; if (Type(mycell) == LEAF) { l = (leafptr) mycell; for (i = 0; i < l->num_bodies; i++) { if (work >= Local[ProcessId].workMin - .1) { if((Local[ProcessId].mynbody+2) > maxmybody) { error("find_my_bodies: Processor %ld needs more than %ld bodies; increase fleaves\n", ProcessId, maxmybody); } Local[ProcessId].mybodytab[Local[ProcessId].mynbody++] = Bodyp(l)[i]; } work += Cost(Bodyp(l)[i]); if (work >= Local[ProcessId].workMax-.1) { break; } } } else { for(i = 0; (i < NSUB) && (work < (Local[ProcessId].workMax - .1)); i++){ qptr = Subp(mycell)[Child_Sequence[direction][i]]; if (qptr!=NULL) { if ((work+Cost(qptr)) >= (Local[ProcessId].workMin -.1)) { find_my_bodies(qptr,work, Direction_Sequence[direction][i], ProcessId); } work += Cost(qptr); } } } } /* * HOUSEKEEP: reinitialize the different variables (in particular global * variables) between each time step. */ void Housekeep(long ProcessId) { Local[ProcessId].myn2bcalc = Local[ProcessId].mynbccalc = Local[ProcessId].myselfint = 0; SETVS(Local[ProcessId].min,1E99); SETVS(Local[ProcessId].max,-1E99); } /* * SETBOUND: Compute the initial size of the root of the tree; only done * before first time step, and only processor 0 does it */ void setbound() { long i; real side ; bodyptr p; SETVS(Local[0].min,1E99); SETVS(Local[0].max,-1E99); side=0; for (p = bodytab; p < bodytab+nbody; p++) { for (i=0; iLocal[0].max[i]) Local[0].max[i]=Pos(p)[i] ; } } SUBV(Local[0].max,Local[0].max,Local[0].min); for (i=0; irmin,Local[0].min,-side/100000.0); Global->rsize = 1.00002*side; SETVS(Global->max,-1E99); SETVS(Global->min,1E99); } void Help() { printf("There are a total of twelve parameters, and all of them have default values.\n"); printf("\n"); printf("1) infile (char*) : The name of an input file that contains particle data. \n"); printf(" The format of the file is:\n"); printf("\ta) An int representing the number of particles in the distribution\n"); printf("\tb) An int representing the dimensionality of the problem (3-D)\n"); printf("\tc) A double representing the current time of the simulation\n"); printf("\td) Doubles representing the masses of all the particles\n"); printf("\te) A vector (length equal to the dimensionality) of doubles\n"); printf("\t representing the positions of all the particles\n"); printf("\tf) A vector (length equal to the dimensionality) of doubles\n"); printf("\t representing the velocities of all the particles\n"); printf("\n"); printf(" Each of these numbers can be separated by any amount of whitespace.\n"); printf("\n"); printf("2) nbody (int) : If no input file is specified (the first line is blank), this\n"); printf(" number specifies the number of particles to generate under a plummer model.\n"); printf(" Default is 16384.\n"); printf("\n"); printf("3) seed (int) : The seed used by the random number generator.\n"); printf(" Default is 123.\n"); printf("\n"); printf("4) outfile (char*) : The name of the file that snapshots will be printed to. \n"); printf(" This feature has been disabled in the SPLASH release.\n"); printf(" Default is NULL.\n"); printf("\n"); printf("5) dtime (double) : The integration time-step.\n"); printf(" Default is 0.025.\n"); printf("\n"); printf("6) eps (double) : The usual potential softening\n"); printf(" Default is 0.05.\n"); printf("\n"); printf("7) tol (double) : The cell subdivision tolerance.\n"); printf(" Default is 1.0.\n"); printf("\n"); printf("8) fcells (double) : The total number of cells created is equal to \n"); printf(" fcells * number of leaves.\n"); printf(" Default is 2.0.\n"); printf("\n"); printf("9) fleaves (double) : The total number of leaves created is equal to \n"); printf(" fleaves * nbody.\n"); printf(" Default is 0.5.\n"); printf("\n"); printf("10) tstop (double) : The time to stop integration.\n"); printf(" Default is 0.075.\n"); printf("\n"); printf("11) dtout (double) : The data-output interval.\n"); printf(" Default is 0.25.\n"); printf("\n"); printf("12) NPROC (int) : The number of processors.\n"); printf(" Default is 1.\n"); }