Compare commits
10 commits
3370059668
...
594ab43cd8
Author | SHA1 | Date | |
---|---|---|---|
594ab43cd8 | |||
0f4b39775c | |||
2fcc51c2c1 | |||
|
1470e87527 | ||
|
627f5dabdb | ||
bfcc9d4080 | |||
5af8a0c2af | |||
6f9826e621 | |||
|
f70c734a85 | ||
|
8056b73511 |
293 changed files with 63909 additions and 7 deletions
|
@ -95,7 +95,8 @@ class L1DCache(L1Cache):
|
|||
SimpleOpts.add_option('--l1d_assoc',
|
||||
help="L1 data cache associativity. Default: %s" % assoc)
|
||||
SimpleOpts.add_option('--replacement_policy',
|
||||
help="L1 cache replacement policy. [NMRU,LRU,Random]")
|
||||
help="L1 cache replacement policy. [NMRU,LFU,LIFO,LRU,"
|
||||
"Random,FIFO]")
|
||||
|
||||
def __init__(self, opts=None):
|
||||
super(L1DCache, self).__init__(opts)
|
||||
|
@ -117,6 +118,15 @@ class L1DCache(L1Cache):
|
|||
elif opts.replacement_policy == "LRU":
|
||||
from m5.objects import LRU
|
||||
self.tags = LRU()
|
||||
elif opts.replacement_policy == "LFU":
|
||||
from m5.objects import LFU
|
||||
self.tags = LFU()
|
||||
elif opts.replacement_policy == "LIFO":
|
||||
from m5.objects import LIFO
|
||||
self.tags = LIFO()
|
||||
elif opts.replacement_policy == "FIFO":
|
||||
from m5.objects import FIFO
|
||||
self.tags = FIFO()
|
||||
elif opts.replacement_policy:
|
||||
fatal("Unsupported replacement policy: %s" %
|
||||
opts.replacement_policy)
|
||||
|
|
349
splash2/README.SPLASH2
Normal file
349
splash2/README.SPLASH2
Normal file
|
@ -0,0 +1,349 @@
|
|||
Date: Oct 19, 1994
|
||||
|
||||
This is the directory for the second release of the Stanford Parallel
|
||||
Applications for Shared-Memory (SPLASH-2) programs. For further
|
||||
information contact splash@mojave.stanford.edu.
|
||||
|
||||
PLEASE NOTE: Due to our limited resources, we will be unable to spend
|
||||
much time answering questions about the applications.
|
||||
|
||||
splash.tar contains the tared version of all the files. Grabbing this
|
||||
file will get you everything you need. We also keep the files
|
||||
individually untared for partial retrieval. The splash.tar file is not
|
||||
compressed, but the large files in it are. We attempted to compress the
|
||||
splash.tar file to reduce the file size further, but this resulted in
|
||||
a negative compression ratio.
|
||||
|
||||
|
||||
DIFFERENCES BETWEEN SPLASH AND SPLASH-2:
|
||||
----------------------------------------
|
||||
|
||||
The SPLASH-2 suite contains two types of codes: full applications and
|
||||
kernels. Each of the codes utilizes the Argonne National Laboratories
|
||||
(ANL) parmacs macros for parallel constructs. Unlike the codes in the
|
||||
original SPLASH release, each of the codes assumes the use of a
|
||||
"lightweight threads" model (which we hereafter refer to as the "threads"
|
||||
model) in which child processes share the same virtual address space as
|
||||
their parent process. In order for the codes to function correctly,
|
||||
the CREATE macro should call the proper Unix system routine (e.g. "sproc"
|
||||
in the Silicon Graphics IRIX operating system) instead of the "fork"
|
||||
routine that was used for SPLASH. The difference is that processes
|
||||
created with the Unix fork command receive their own private copies of
|
||||
all global variables. In the threads model, child processes share the
|
||||
same virtual address space, and hence all global data. Some of the
|
||||
codes function correctly when the Unix "fork" command is used for child
|
||||
process creation as well. Comments in the code header denote those
|
||||
applications which function correctly with "fork."
|
||||
|
||||
|
||||
MACROS:
|
||||
-------
|
||||
|
||||
Macros for the previous release of the SPLASH application suite can be
|
||||
obtained via anonymous ftp to www-flash.stanford.edu. The macros are
|
||||
contained in the pub/old_splash/splash/macros subdirectory. HOWEVER,
|
||||
THE MACRO FILES MUST BE MODIFIED IN ORDER TO BE USED WITH SPLASH-2 CODES.
|
||||
The CREATE macros must be changed so that they call the proper process
|
||||
creation routine (See DIFFERENCES section above) instead of "fork."
|
||||
|
||||
In this macros subdirectory, macros and sample makefiles are provided
|
||||
for three machines:
|
||||
|
||||
Encore Multimax (CMU Mach 2.5: C and Fortran)
|
||||
SGI 4D/240 (IRIX System V Release 3.3: C only)
|
||||
Alliant FX/8 (Alliant Rev. 5.0: C and Fortran)
|
||||
|
||||
These macros work for us with the above operating systems. Unfortunately,
|
||||
our limited resources prevent us from supporting them in any way or
|
||||
even fielding questions about them. If they don't work for you, please
|
||||
contact Argonne National Labs for a version that will. An e-mail address
|
||||
to try might be monitor-users-request@mcs.anl.gov. An excerpt from
|
||||
a message, received from Argonne, concerning obtaining the macros follows:
|
||||
|
||||
"The parmacs package is in the public domain. Approximately 15 people at
|
||||
Argonne (or associated with Argonne or students) have worked on the
|
||||
parmacs package at one time or another. The parmacs package is
|
||||
implemented via macros using the M4 macropreprocessor (standard on most
|
||||
Unix systems). Current distribution of the software is somewhat ad hoc.
|
||||
Most C versions can be obtained from netlib (send electronic mail to
|
||||
netlib@ornl.gov with the message send index from parmacs). Fortran
|
||||
versions have been emailed directly or sent on tape. The primary
|
||||
documentation for the parmacs package is the book ``Portable Programs for
|
||||
Parallel Processors'' by Lusk, et al, Holt, Rinehart, and Winston 1987."
|
||||
|
||||
The makefiles provided in the individual program directories specify
|
||||
a null macro set that will turn the parallel programs into sequential
|
||||
ones. Note that we do not have a null macro set for FORTRAN.
|
||||
|
||||
|
||||
CODE ENHANCEMENTS:
|
||||
------------------
|
||||
|
||||
All of the codes are designed for shared address space multiprocessors
|
||||
with physically distributed main memory. For these types of machines,
|
||||
process migration and poor data distribution can decrease performance
|
||||
to suboptimal levels. In the applications, comments indicating potential
|
||||
enhancements can be found which will improve performance. Each potential
|
||||
enhancement is denoted by a comment beginning with "POSSIBLE ENHANCEMENT".
|
||||
The potential enhancements which we identify are:
|
||||
|
||||
(1) Data Distribution
|
||||
|
||||
Comments are placed in the code indicating where directives should
|
||||
be placed so that data can be migrated to the local memories of
|
||||
nodes, thus allowing for remote communication to be minimized.
|
||||
|
||||
(2) Process-to-Processor Assignment
|
||||
|
||||
Comments are placed in the code indicating where directives should
|
||||
be placed so that processes can be "pinned" to processors,
|
||||
preventing them from migrating from processor to processor.
|
||||
|
||||
In addition, to facilitate simulation studies, we note points in the
|
||||
codes where statistics gathering routines should be turned on so that
|
||||
cold-start and initialization effects can be avoided.
|
||||
|
||||
As previously mentioned, processes are assumed to be created through calls
|
||||
to a "threads" model creation routine. One important side effect is that
|
||||
this model causes all global variables to be shared (whereas the fork model
|
||||
causes all processes to get their own private copy of global variables).
|
||||
In order to mimic the behavior of global variables in the fork model, many
|
||||
of the applications provide arrays of structures that can be accessed by
|
||||
process ID, such as:
|
||||
|
||||
struct per_process_info {
|
||||
char pad[PAD_LENGTH];
|
||||
unsigned start_time;
|
||||
unsigned end_time;
|
||||
char pad[PAD_LENGTH];
|
||||
} PPI[MAX_PROCS];
|
||||
|
||||
In these structures, padding is inserted to ensure that the structure
|
||||
information associated with each process can be placed on a different
|
||||
page of memory, and can thus be explicitly migrated to that processor's
|
||||
local memory system. We follow this strategy for certain variables since
|
||||
these data really belong to a process and should be allocated in its local
|
||||
memory. A programming model that had the ability to declare global private
|
||||
data would have automatically ensured that these data were private, and
|
||||
that false sharing did not occur across different structures in the
|
||||
array. However, since the threads model does not provide this capability,
|
||||
it is provided by explicitly introducing arrays of structures with padding.
|
||||
The padding constants used in the programs (PAD_LENGTH in this example)
|
||||
can easily be changed to suit the particular characteristics of a given
|
||||
system. The actual data that is manipulated by individual applications
|
||||
(e.g. grid points, particle data, etc) is not padded, however.
|
||||
|
||||
Finally, for some applications we provide less-optimized versions of the
|
||||
codes. The less-optimized versions utilize data structures that lead to
|
||||
simpler implementations, but which do not allow for optimal data
|
||||
distribution (and can thus generate false-sharing).
|
||||
|
||||
|
||||
REPORT:
|
||||
-------
|
||||
|
||||
A report will be put together shortly describing the structure, function,
|
||||
and performance characteristics of each application. The report will be
|
||||
similar to the original SPLASH report (see the original report for the
|
||||
issues discussed). The report will provide quantitative data (for two
|
||||
different cache line size) for characteristics such as working set size
|
||||
and miss rates (local versus remote, etc.). In addition, the report
|
||||
will discuss cache behavior and synchronization behavior of the
|
||||
applications as well. In the mean time, each application directory has
|
||||
a README file that describes how to run each application. In addition,
|
||||
most applications have comments in their headers describing how to run
|
||||
each application.
|
||||
|
||||
|
||||
README FILES:
|
||||
-------------
|
||||
|
||||
Each application has an associated README file. It is VERY important to
|
||||
read these files carefully, as they discuss the important parameters to
|
||||
supply for each application, as well as other issues involved in running
|
||||
the programs. In each README file, we discuss the impact of explicitly
|
||||
distributing data on the Stanford DASH Multiprocessor. Unless otherwise
|
||||
specified, we assume that the default data distribution mechanism is
|
||||
through round-robin page allocation.
|
||||
|
||||
|
||||
PROBLEM SIZES:
|
||||
--------------
|
||||
|
||||
For each application, the README file describes a recommended problem
|
||||
size that is a reasonable base problem size that both can be simulated
|
||||
and is not too small for reality on a machine with up to 64 processors.
|
||||
For the purposes of studying algorithm performance, the parameters
|
||||
associated with each application can be varied. However, for the
|
||||
purposes of comparing machine architectures, the README files describe
|
||||
which parameters can be varied, and which should remain constant (or at
|
||||
their default values) for comparability. If the specific "base"
|
||||
parameters that are specified are not used, then results which are
|
||||
reported should explicitly state which parameters were changed, what
|
||||
their new values are, and address why they were changed.
|
||||
|
||||
|
||||
CORE PROGRAMS:
|
||||
--------------
|
||||
|
||||
Since the number of programs has increased over SPLASH, and since not
|
||||
everyone may be able to use all the programs in a given study, we
|
||||
identify some of the programs as "core" programs that should be used
|
||||
in most studies for comparability. In the currently available set, these
|
||||
core programs include:
|
||||
|
||||
(1) Ocean Simulation
|
||||
(2) Hierarchical Radiosity
|
||||
(3) Water Simulation with Spatial data structure
|
||||
(4) Barnes-Hut
|
||||
(5) FFT
|
||||
(6) Blocked Sparse Cholesky Factorization
|
||||
(7) Radix Sort
|
||||
|
||||
The less optimized versions of the programs, when provided, should be
|
||||
used only in addition to these.
|
||||
|
||||
|
||||
MAILING LIST:
|
||||
-------------
|
||||
|
||||
Please send a note to splash@mojave.stanford.edu if you have copied over
|
||||
the programs, so that we can put you on a mailing list for update reports.
|
||||
|
||||
|
||||
AUTHORSHIP:
|
||||
-----------
|
||||
|
||||
The applications provided in the SPLASH-2 suite were developed by a number
|
||||
of people. The report lists authors primarily responsible for the
|
||||
development of each application code. The codes were made ready for
|
||||
distribution and the README files were prepared by Steven Cameron Woo and
|
||||
Jaswinder Pal Singh.
|
||||
|
||||
|
||||
CODE CHANGES:
|
||||
-------------
|
||||
|
||||
If modifications are made to the codes which improve their performance,
|
||||
we would like to hear about them. Please send email to
|
||||
splash@mojave.stanford.edu detailing the changes.
|
||||
|
||||
|
||||
UPDATE REPORTS:
|
||||
---------------
|
||||
|
||||
Watch this file for information regarding changes to codes and additions
|
||||
to the application suite.
|
||||
|
||||
|
||||
CHANGES:
|
||||
-------
|
||||
|
||||
10-21-94: Ocean code, contiguous partitions, line 247 of slave1.C changed
|
||||
from
|
||||
|
||||
t2a[0][0] = hh3*t2a[0][0]+hh1*psi[procid][1][0][0];
|
||||
|
||||
to
|
||||
|
||||
t2a[0][0] = hh3*t2a[0][0]+hh1*t2c[0][0];
|
||||
|
||||
This change does not affect correctness; it is an optimization
|
||||
that was performed elsewhere in the code but overlooked here.
|
||||
|
||||
11-01-94: Barnes, file code_io.C, line 55 changed from
|
||||
|
||||
in_real(instr, tnow);
|
||||
|
||||
to
|
||||
|
||||
in_real(instr, &tnow);
|
||||
|
||||
11-01-94: Raytrace, file main.C, lines 216-223 changed from
|
||||
|
||||
if ((pid == 0) || (dostats))
|
||||
CLOCK(end);
|
||||
|
||||
gm->partime[0] = (end - begin) & 0x7FFFFFFF;
|
||||
if (pid == 0) gm->par_start_time = begin;
|
||||
|
||||
/* printf("Process %ld elapsed time %lu.\n", pid, lapsed); */
|
||||
|
||||
}
|
||||
|
||||
to
|
||||
|
||||
if ((pid == 0) || (dostats)) {
|
||||
CLOCK(end);
|
||||
gm->partime[pid] = (end - begin) & 0x7FFFFFFF;
|
||||
if (pid == 0) gm->par_start_time = begin;
|
||||
}
|
||||
|
||||
11-13-94: Raytrace, file memory.C
|
||||
|
||||
The use of the word MAIN_INITENV in a comment in memory.c causes
|
||||
m4 to expand this macro, and some implementations may get confused
|
||||
and generate the wrong C code.
|
||||
|
||||
11-13-94: Radiosity, file rad_main.C
|
||||
|
||||
rad_main.C uses the macro CREATE_LITE. All three instances of
|
||||
CREATE_LITE should be changed to CREATE.
|
||||
|
||||
11-13-94: Water-spatial and Water-nsquared, file makefile
|
||||
|
||||
makefiles were changed so that the compilation phases included the
|
||||
CFLAGS options instead of the CCOPTS options, which did not exist.
|
||||
|
||||
11-17-94: FMM, file particle.C
|
||||
|
||||
Comment regarding data distribution of particle_array data
|
||||
structure is incorrect. Round-robin allocation should be used.
|
||||
|
||||
11-18-94: OCEAN, contiguous partitions, files main.C and linkup.C
|
||||
|
||||
Eliminated a problem which caused non-doubleword aligned
|
||||
accesses to doublewords for the uniprocessor case.
|
||||
|
||||
main.C: Added lines 467-471:
|
||||
|
||||
if (nprocs%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
d_size += sizeof(double ***);
|
||||
}
|
||||
|
||||
Added same lines in file linkup.C at line numbers 100 and 159.
|
||||
|
||||
07-30-95: RADIX has been changed. A tree-structured parallel prefix
|
||||
computation is now used instead of a linear one.
|
||||
|
||||
LU had been modified. A comment describing how to distribute
|
||||
data (one of the POSSIBLE ENHANCEMENTS) was incorrect for the
|
||||
contiguous_blocks version of LU. Also, a modification was made
|
||||
that reduces false sharing at line 206 of lu.C:
|
||||
|
||||
last_malloc[i] = (double *) (((unsigned) last_malloc[i]) + PAGE_SIZE -
|
||||
((unsigned) last_malloc[i]) % PAGE_SIZE);
|
||||
|
||||
A subdirectory shmem_files was added under the codes directory.
|
||||
This directory contains a file that can be compiled on SGI machines
|
||||
which replaces the libsgi.a file distributed in the original SPLASH
|
||||
release.
|
||||
|
||||
09-26-95: Fixed a bug in LU. Line 201 was changed from
|
||||
|
||||
last_malloc[i] = (double *) G_MALLOC(proc_bytes[i])
|
||||
|
||||
to
|
||||
|
||||
last_malloc[i] = (double *) G_MALLOC(proc_bytes[i] + PAGE_SIZE)
|
||||
|
||||
Fixed similar bugs in WATER-NSQUARED and WATER-SPATIAL. Both
|
||||
codes needed a barrier added into the mdmain.C files. In both
|
||||
codes, the line
|
||||
|
||||
BARRIER(gl->start, NumProcs);
|
||||
|
||||
was added. In WATER-NSQUARED, it was added in mdmain.C at line
|
||||
84. In WATER-SPATIAL, it was added in mdmain.C at line 107.
|
124
splash2/SPLASH2.POSTING
Normal file
124
splash2/SPLASH2.POSTING
Normal file
|
@ -0,0 +1,124 @@
|
|||
We are pleased to announce the release of the SPLASH-2 suite of
|
||||
multiprocessor applications. SPLASH-2 is the successor to the SPLASH
|
||||
suite that we previously released, and the programs in it are also
|
||||
written assuming a coherent shared address space communication model.
|
||||
SPLASH-2 contains several new applications, as well as improved versions
|
||||
of applications from SPLASH. The suite is currently available via
|
||||
anonymous ftp to
|
||||
|
||||
www-flash.stanford.edu (in the pub/splash2 subdirectory)
|
||||
|
||||
and via the World-Wide-Web at
|
||||
|
||||
http://www-flash.stanford.edu/apps/SPLASH/
|
||||
|
||||
Several programs are currently available, and a few others will be added
|
||||
shortly. The programs fall into two categories: full applications and
|
||||
kernels. Additionally, we designate some of these as "core programs"
|
||||
(see below). The applications and kernels currently available in the
|
||||
SPLASH-2 suite include:
|
||||
|
||||
Applications:
|
||||
Ocean Simulation
|
||||
Ray Tracer
|
||||
Hierarchical Radiosity
|
||||
Volume Renderer
|
||||
Water Simulation with Spatial Data Structure
|
||||
Water Simulation without Spatial Data Structure
|
||||
Barnes-Hut (gravitational N-body simulation)
|
||||
Adaptive Fast Multipole (gravitational N-body simulation)
|
||||
|
||||
Kernels:
|
||||
FFT
|
||||
Blocked LU Decomposition
|
||||
Blocked Sparse Cholesky Factorization
|
||||
Radix Sort
|
||||
|
||||
Programs that will appear soon include:
|
||||
|
||||
PSIM4 - Particle Dynamics Simulation (full application)
|
||||
Conjugate Gradient (kernel)
|
||||
LocusRoute (standard cell router from SPLASH)
|
||||
Protein Structure Prediction
|
||||
Protein Sequencing
|
||||
Parallel Probabilistic Inference
|
||||
|
||||
In some cases, we provide both well-optimized and less-optimized versions
|
||||
of the programs. For both the Ocean simulation and the Blocked LU
|
||||
Decomposition kernel, less optimized versions of the codes are currently
|
||||
available.
|
||||
|
||||
There are important differences between applications in the SPLASH-2 suite
|
||||
and applications in the SPLASH suite. These differences are noted in the
|
||||
README.SPLASH2 file in the pub/splash2 directory. It is *VERY IMPORTANT*
|
||||
that you read the README.SPLASH2 file, as well as the individual README
|
||||
files in the program directories, before using the SPLASH-2 programs.
|
||||
These files describe how to run the programs, provide commented annotations
|
||||
about how to distribute data on a machine with physically distributed main
|
||||
memory, and provides guidelines on the baseline problem sizes to use when
|
||||
studying architectural interactions through simulation.
|
||||
|
||||
Complete documentation of SPLASH2, including a detailed characterization
|
||||
of performance as well as memory system interactions and synchronization
|
||||
behavior, will appear in the SPLASH2 report that is currently being
|
||||
written.
|
||||
|
||||
|
||||
OPTIMIZATION STRATEGY:
|
||||
----------------------
|
||||
|
||||
For each application and kernel, we note potential features or
|
||||
enhancements that are typically machine-specific. These potential
|
||||
enhancements are encapsulated within comments in the code starting with
|
||||
the string "POSSIBLE ENHANCEMENT." The potential enhancements which we
|
||||
identify are:
|
||||
|
||||
(1) Data Distribution
|
||||
|
||||
We note where data migration routines should be called in order to
|
||||
enhance locality of data access. We do not distribute data by
|
||||
default as different machines implement migration routines in
|
||||
different ways, and on some machines this is not relevant.
|
||||
|
||||
(2) Process-to-Processor Assignment
|
||||
|
||||
We note where calls can be made to "pin" processes to specific
|
||||
processors so that process migration can be avoided. We do not
|
||||
do this by default, since different machines implement this
|
||||
feature in different ways.
|
||||
|
||||
In addition, to facilitate simulation studies, we note points in the
|
||||
codes where statistics gathering routines should be turned on so that
|
||||
cold-start and initialization effects can be avoided.
|
||||
|
||||
For two programs (Ocean and LU), we provide less-optimized versions of
|
||||
the codes. The less-optimized versions utilize data structures that
|
||||
lead to simpler implementations, but which do not allow for optimal data
|
||||
distribution (and can generate false-sharing).
|
||||
|
||||
|
||||
CORE PROGRAMS:
|
||||
--------------
|
||||
|
||||
Since the number of programs has increased over SPLASH, and since not
|
||||
everyone may be able to use all the programs in a given study, we
|
||||
identify some of the programs as "core" programs that should be used
|
||||
in most studies for comparability. In the currently available set,
|
||||
these core programs include:
|
||||
|
||||
(1) Ocean Simulation
|
||||
(2) Hierarchical Radiosity
|
||||
(3) Water Simulation with Spatial data structure
|
||||
(4) Barnes-Hut
|
||||
(5) FFT
|
||||
(6) Blocked Sparse Cholesky Factorization
|
||||
(7) Radix Sort
|
||||
|
||||
The less optimized versions of the programs, when available, should be
|
||||
used only in addition to these.
|
||||
|
||||
The base problem sizes that we recommend are provided in the README files
|
||||
for individual applications. Please use at least these for experiments
|
||||
with upto 64 processors. If changes are made to these base parameters
|
||||
for further experimentation, these changes should be explicitly stated
|
||||
in any results that are presented.
|
34
splash2/codes/Makefile.config
Normal file
34
splash2/codes/Makefile.config
Normal file
|
@ -0,0 +1,34 @@
|
|||
CC := gcc
|
||||
CFLAGS := -static -O3 -pthread -D_POSIX_C_SOURCE=200112
|
||||
#CFLAGS := -g3 -pthread -D_POSIX_C_SOURCE=200112
|
||||
CFLAGS := $(CFLAGS) -Wall -W -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wdisabled-optimization
|
||||
CFLAGS := $(CFLAGS) -Wpadded -Winline -Wpointer-arith -Wsign-compare -Wendif-labels
|
||||
LDFLAGS := -lm libpthread.a m5op_x86.o
|
||||
|
||||
BASEDIR := $(HOME)/GitSource/gem5/splash2/codes
|
||||
MACROS := $(BASEDIR)/null_macros/c.m4.null.POSIX
|
||||
M4 := m4 -s -Ulen -Uindex
|
||||
|
||||
x = *
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
$(CC) $(OBJS) $(CFLAGS) -o $(TARGET) $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
rm -rf *.c *.h *.o $(TARGET)
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .o .c .C .h .H
|
||||
|
||||
.H.h:
|
||||
$(M4) $(MACROS) $*.H > $*.h
|
||||
|
||||
.C.c:
|
||||
$(M4) $(MACROS) $*.C > $*.c
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) $*.c
|
||||
|
||||
.C.o:
|
||||
$(M4) $(MACROS) $*.C > $*.c
|
||||
$(CC) -c $(CFLAGS) $*.c
|
13
splash2/codes/apps/barnes/Makefile
Normal file
13
splash2/codes/apps/barnes/Makefile
Normal file
|
@ -0,0 +1,13 @@
|
|||
TARGET = BARNES
|
||||
OBJS = code.o code_io.o load.o grav.o getparam.o util.o
|
||||
|
||||
include ../../Makefile.config
|
||||
|
||||
stdinc.h: code.h defs.h util.h vectmath.h load.h code_io.h grav.h getparam.h stdinc.H
|
||||
code.o: code.C stdinc.h
|
||||
code_io.o: code_io.C stdinc.h
|
||||
getparam.o: getparam.C stdinc.h
|
||||
grav.o: grav.C stdinc.h
|
||||
load.o: load.C stdinc.h
|
||||
util.o: util.C stdinc.h
|
||||
|
50
splash2/codes/apps/barnes/README.barnes
Normal file
50
splash2/codes/apps/barnes/README.barnes
Normal file
|
@ -0,0 +1,50 @@
|
|||
GENERAL INFORMATION:
|
||||
|
||||
The BARNES application implements the Barnes-Hut method to simulate the
|
||||
interaction of a system of bodies (N-body problem). A general description
|
||||
of the Barnes-Hut method can be found in:
|
||||
|
||||
Singh, J. P. Parallel Hierarchical N-body Methods and Their Implications
|
||||
for Multiprocessors. PhD Thesis, Stanford University, February 1993.
|
||||
|
||||
The SPLASH-2 implementation allows for multiple particles to be stored in
|
||||
each leaf cell of the space partition. A description of this feature
|
||||
can be found in:
|
||||
|
||||
Holt, C. and Singh, J. P. Hierarchical N-Body Methods on Shared Address
|
||||
Space Multiprocessors. SIAM Conference on Parallel Processing
|
||||
for Scientific Computing, Feb 1995, to appear.
|
||||
|
||||
RUNNING THE PROGRAM:
|
||||
|
||||
To see how to run the program, please see the comment at the top of the
|
||||
file code.C, or run the application with the "-h" command line option.
|
||||
The input parameters should be placed in a file and redirected to standard
|
||||
input. Of the twelve input parameters, the ones which would normally be
|
||||
varied are the number of particles and the number of processors. If other
|
||||
parameters are changed, these changes should be reported in any results
|
||||
that are presented.
|
||||
|
||||
The only compile time option, -DQUADPOLE, controls the use of quadpole
|
||||
interactions during the force computation. For the input parameters
|
||||
provided, the -DQUADPOLE option should not be defined. The constant
|
||||
MAX_BODIES_PER_LEAF defines the maximum number of particles per leaf
|
||||
cell in the tree. This constant also affects the parameter "fleaves" in
|
||||
the input file, which controls how many leaf cells space is allocated for.
|
||||
The higher the value of MAX_BODIES_PER_LEAF, the lower fleaves should be.
|
||||
Both these parameters should be kept at their default values for base
|
||||
SPLASH-2 runs. If changes are made, they should be reported in any results
|
||||
that are presented.
|
||||
|
||||
BASE PROBLEM SIZE:
|
||||
|
||||
The base problem size for an upto-64 processor machine is 16384 particles.
|
||||
For this many particles, you can use the input file provided (and change
|
||||
only the number of processors).
|
||||
|
||||
DATA DISTRIBUTION:
|
||||
|
||||
Our "POSSIBLE ENHANCEMENT" comments in the source code tell where one
|
||||
might want to distribute data and how. Data distribution, however, does
|
||||
not make much difference to performance on the Stanford DASH
|
||||
multiprocessor.
|
922
splash2/codes/apps/barnes/code.C
Normal file
922
splash2/codes/apps/barnes/code.C
Normal file
|
@ -0,0 +1,922 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
/*
|
||||
Usage: BARNES <options> < inputfile
|
||||
|
||||
Command line options:
|
||||
|
||||
-h : Print out input file description
|
||||
|
||||
Input parameters should be placed in a file and redirected through
|
||||
standard input. There are a total of twelve parameters, and all of
|
||||
them have default values.
|
||||
|
||||
1) infile (char*) : The name of an input file that contains particle
|
||||
data.
|
||||
|
||||
The format of the file is:
|
||||
a) An int representing the number of particles in the distribution
|
||||
b) An int representing the dimensionality of the problem (3-D)
|
||||
c) A double representing the current time of the simulation
|
||||
d) Doubles representing the masses of all the particles
|
||||
e) A vector (length equal to the dimensionality) of doubles
|
||||
representing the positions of all the particles
|
||||
f) A vector (length equal to the dimensionality) of doubles
|
||||
representing the velocities of all the particles
|
||||
|
||||
Each of these numbers can be separated by any amount of whitespace.
|
||||
2) nbody (int) : If no input file is specified (the first line is
|
||||
blank), this number specifies the number of particles to generate
|
||||
under a plummer model. Default is 16384.
|
||||
3) seed (int) : The seed used by the random number generator.
|
||||
Default is 123.
|
||||
4) outfile (char*) : The name of the file that snapshots will be
|
||||
printed to. This feature has been disabled in the SPLASH release.
|
||||
Default is NULL.
|
||||
5) dtime (double) : The integration time-step.
|
||||
Default is 0.025.
|
||||
6) eps (double) : The usual potential softening
|
||||
Default is 0.05.
|
||||
7) tol (double) : The cell subdivision tolerance.
|
||||
Default is 1.0.
|
||||
8) fcells (double) : Number of cells created = fcells * number of
|
||||
leaves.
|
||||
Default is 2.0.
|
||||
9) fleaves (double) : Number of leaves created = fleaves * nbody.
|
||||
Default is 0.5.
|
||||
10) tstop (double) : The time to stop integration.
|
||||
Default is 0.075.
|
||||
11) dtout (double) : The data-output interval.
|
||||
Default is 0.25.
|
||||
12) NPROC (int) : The number of processors.
|
||||
Default is 1.
|
||||
*/
|
||||
|
||||
MAIN_ENV
|
||||
|
||||
#define global /* nada */
|
||||
|
||||
#include "stdinc.h"
|
||||
|
||||
string defv[] = { /* DEFAULT PARAMETER VALUES */
|
||||
/* file names for input/output */
|
||||
"in=", /* snapshot of initial conditions */
|
||||
"out=", /* stream of output snapshots */
|
||||
|
||||
/* params, used if no input specified, to make a Plummer Model */
|
||||
"nbody=16384", /* number of particles to generate */
|
||||
"seed=123", /* random number generator seed */
|
||||
|
||||
/* params to control N-body integration */
|
||||
"dtime=0.025", /* integration time-step */
|
||||
"eps=0.05", /* usual potential softening */
|
||||
"tol=1.0", /* cell subdivision tolerence */
|
||||
"fcells=2.0", /* cell allocation parameter */
|
||||
"fleaves=0.5", /* leaf allocation parameter */
|
||||
|
||||
"tstop=0.075", /* time to stop integration */
|
||||
"dtout=0.25", /* data-output interval */
|
||||
|
||||
"NPROC=1", /* number of processors */
|
||||
};
|
||||
|
||||
/* The more complicated 3D case */
|
||||
#define NUM_DIRECTIONS 32
|
||||
#define BRC_FUC 0
|
||||
#define BRC_FRA 1
|
||||
#define BRA_FDA 2
|
||||
#define BRA_FRC 3
|
||||
#define BLC_FDC 4
|
||||
#define BLC_FLA 5
|
||||
#define BLA_FUA 6
|
||||
#define BLA_FLC 7
|
||||
#define BUC_FUA 8
|
||||
#define BUC_FLC 9
|
||||
#define BUA_FUC 10
|
||||
#define BUA_FRA 11
|
||||
#define BDC_FDA 12
|
||||
#define BDC_FRC 13
|
||||
#define BDA_FDC 14
|
||||
#define BDA_FLA 15
|
||||
|
||||
#define FRC_BUC 16
|
||||
#define FRC_BRA 17
|
||||
#define FRA_BDA 18
|
||||
#define FRA_BRC 19
|
||||
#define FLC_BDC 20
|
||||
#define FLC_BLA 21
|
||||
#define FLA_BUA 22
|
||||
#define FLA_BLC 23
|
||||
#define FUC_BUA 24
|
||||
#define FUC_BLC 25
|
||||
#define FUA_BUC 26
|
||||
#define FUA_BRA 27
|
||||
#define FDC_BDA 28
|
||||
#define FDC_BRC 29
|
||||
#define FDA_BDC 30
|
||||
#define FDA_BLA 31
|
||||
|
||||
static long Child_Sequence[NUM_DIRECTIONS][NSUB] =
|
||||
{
|
||||
{ 2, 5, 6, 1, 0, 3, 4, 7}, /* BRC_FUC */
|
||||
{ 2, 5, 6, 1, 0, 7, 4, 3}, /* BRC_FRA */
|
||||
{ 1, 6, 5, 2, 3, 0, 7, 4}, /* BRA_FDA */
|
||||
{ 1, 6, 5, 2, 3, 4, 7, 0}, /* BRA_FRC */
|
||||
{ 6, 1, 2, 5, 4, 7, 0, 3}, /* BLC_FDC */
|
||||
{ 6, 1, 2, 5, 4, 3, 0, 7}, /* BLC_FLA */
|
||||
{ 5, 2, 1, 6, 7, 4, 3, 0}, /* BLA_FUA */
|
||||
{ 5, 2, 1, 6, 7, 0, 3, 4}, /* BLA_FLC */
|
||||
{ 1, 2, 5, 6, 7, 4, 3, 0}, /* BUC_FUA */
|
||||
{ 1, 2, 5, 6, 7, 0, 3, 4}, /* BUC_FLC */
|
||||
{ 6, 5, 2, 1, 0, 3, 4, 7}, /* BUA_FUC */
|
||||
{ 6, 5, 2, 1, 0, 7, 4, 3}, /* BUA_FRA */
|
||||
{ 5, 6, 1, 2, 3, 0, 7, 4}, /* BDC_FDA */
|
||||
{ 5, 6, 1, 2, 3, 4, 7, 0}, /* BDC_FRC */
|
||||
{ 2, 1, 6, 5, 4, 7, 0, 3}, /* BDA_FDC */
|
||||
{ 2, 1, 6, 5, 4, 3, 0, 7}, /* BDA_FLA */
|
||||
|
||||
{ 3, 4, 7, 0, 1, 2, 5, 6}, /* FRC_BUC */
|
||||
{ 3, 4, 7, 0, 1, 6, 5, 2}, /* FRC_BRA */
|
||||
{ 0, 7, 4, 3, 2, 1, 6, 5}, /* FRA_BDA */
|
||||
{ 0, 7, 4, 3, 2, 5, 6, 1}, /* FRA_BRC */
|
||||
{ 7, 0, 3, 4, 5, 6, 1, 2}, /* FLC_BDC */
|
||||
{ 7, 0, 3, 4, 5, 2, 1, 6}, /* FLC_BLA */
|
||||
{ 4, 3, 0, 7, 6, 5, 2, 1}, /* FLA_BUA */
|
||||
{ 4, 3, 0, 7, 6, 1, 2, 5}, /* FLA_BLC */
|
||||
{ 0, 3, 4, 7, 6, 5, 2, 1}, /* FUC_BUA */
|
||||
{ 0, 3, 4, 7, 6, 1, 2, 5}, /* FUC_BLC */
|
||||
{ 7, 4, 3, 0, 1, 2, 5, 6}, /* FUA_BUC */
|
||||
{ 7, 4, 3, 0, 1, 6, 5, 2}, /* FUA_BRA */
|
||||
{ 4, 7, 0, 3, 2, 1, 6, 5}, /* FDC_BDA */
|
||||
{ 4, 7, 0, 3, 2, 5, 6, 1}, /* FDC_BRC */
|
||||
{ 3, 0, 7, 4, 5, 6, 1, 2}, /* FDA_BDC */
|
||||
{ 3, 0, 7, 4, 5, 2, 1, 6}, /* FDA_BLA */
|
||||
};
|
||||
|
||||
static long Direction_Sequence[NUM_DIRECTIONS][NSUB] =
|
||||
{
|
||||
{ FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
||||
/* BRC_FUC */
|
||||
{ FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC },
|
||||
/* BRC_FRA */
|
||||
{ FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC },
|
||||
/* BRA_FDA */
|
||||
{ FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
||||
/* BRA_FRC */
|
||||
{ FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA },
|
||||
/* BLC_FDC */
|
||||
{ FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC },
|
||||
/* BLC_FLA */
|
||||
{ FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC },
|
||||
/* BLA_FUA */
|
||||
{ FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA },
|
||||
/* BLA_FLC */
|
||||
{ FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC },
|
||||
/* BUC_FUA */
|
||||
{ FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA },
|
||||
/* BUC_FLC */
|
||||
{ FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
||||
/* BUA_FUC */
|
||||
{ FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC },
|
||||
/* BUA_FRA */
|
||||
{ FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC },
|
||||
/* BDC_FDA */
|
||||
{ FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
||||
/* BDC_FRC */
|
||||
{ FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA },
|
||||
/* BDA_FDC */
|
||||
{ FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC },
|
||||
/* BDA_FLA */
|
||||
|
||||
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA },
|
||||
/* FRC_BUC */
|
||||
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC },
|
||||
/* FRC_BRA */
|
||||
{ BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC },
|
||||
/* FRA_BDA */
|
||||
{ BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA },
|
||||
/* FRA_BRC */
|
||||
{ BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA },
|
||||
/* FLC_BDC */
|
||||
{ BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC },
|
||||
/* FLC_BLA */
|
||||
{ BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC },
|
||||
/* FLA_BUA */
|
||||
{ BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA },
|
||||
/* FLA_BLC */
|
||||
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC },
|
||||
/* FUC_BUA */
|
||||
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA },
|
||||
/* FUC_BLC */
|
||||
{ BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA },
|
||||
/* FUA_BUC */
|
||||
{ BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC },
|
||||
/* FUA_BRA */
|
||||
{ BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC },
|
||||
/* FDC_BDA */
|
||||
{ BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA },
|
||||
/* FDC_BRC */
|
||||
{ BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA },
|
||||
/* FDA_BDC */
|
||||
{ BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC },
|
||||
/* FDA_BLA */
|
||||
};
|
||||
|
||||
int main (int argc, string argv[])
|
||||
{
|
||||
long c;
|
||||
|
||||
while ((c = getopt(argc, argv, "h")) != -1) {
|
||||
switch(c) {
|
||||
case 'h':
|
||||
Help();
|
||||
exit(-1);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Only valid option is \"-h\".\n");
|
||||
exit(-1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Global = NULL;
|
||||
initparam(defv);
|
||||
startrun();
|
||||
initoutput();
|
||||
tab_init();
|
||||
|
||||
Global->tracktime = 0;
|
||||
Global->partitiontime = 0;
|
||||
Global->treebuildtime = 0;
|
||||
Global->forcecalctime = 0;
|
||||
Global->current_id = 0;
|
||||
|
||||
CLOCK(Global->computestart);
|
||||
|
||||
printf("COMPUTESTART = %12lu\n",Global->computestart);
|
||||
|
||||
CREATE(SlaveStart, NPROC);
|
||||
|
||||
WAIT_FOR_END(NPROC);
|
||||
|
||||
CLOCK(Global->computeend);
|
||||
|
||||
printf("COMPUTEEND = %12lu\n",Global->computeend);
|
||||
printf("COMPUTETIME = %12lu\n",Global->computeend - Global->computestart);
|
||||
printf("TRACKTIME = %12lu\n",Global->tracktime);
|
||||
printf("PARTITIONTIME = %12lu\t%5.2f\n",Global->partitiontime,
|
||||
((float)Global->partitiontime)/Global->tracktime);
|
||||
printf("TREEBUILDTIME = %12lu\t%5.2f\n",Global->treebuildtime,
|
||||
((float)Global->treebuildtime)/Global->tracktime);
|
||||
printf("FORCECALCTIME = %12lu\t%5.2f\n",Global->forcecalctime,
|
||||
((float)Global->forcecalctime)/Global->tracktime);
|
||||
printf("RESTTIME = %12lu\t%5.2f\n",
|
||||
Global->tracktime - Global->partitiontime -
|
||||
Global->treebuildtime - Global->forcecalctime,
|
||||
((float)(Global->tracktime-Global->partitiontime-
|
||||
Global->treebuildtime-Global->forcecalctime))/
|
||||
Global->tracktime);
|
||||
MAIN_END;
|
||||
}
|
||||
|
||||
/*
|
||||
* ANLINIT : initialize ANL macros
|
||||
*/
|
||||
void ANLinit()
|
||||
{
|
||||
MAIN_INITENV(,70000000,);
|
||||
/* Allocate global, shared memory */
|
||||
|
||||
Global = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory));
|
||||
if (Global==NULL) error("No initialization for Global\n");
|
||||
|
||||
BARINIT(Global->Barrier, NPROC);
|
||||
|
||||
LOCKINIT(Global->CountLock);
|
||||
LOCKINIT(Global->io_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* INIT_ROOT: Processor 0 reinitialize the global root at each time step
|
||||
*/
|
||||
void init_root()
|
||||
{
|
||||
long i;
|
||||
|
||||
Global->G_root=Local[0].ctab;
|
||||
Global->G_root->seqnum = 0;
|
||||
Type(Global->G_root) = CELL;
|
||||
Done(Global->G_root) = FALSE;
|
||||
Level(Global->G_root) = IMAX >> 1;
|
||||
for (i = 0; i < NSUB; i++) {
|
||||
Subp(Global->G_root)[i] = NULL;
|
||||
}
|
||||
Local[0].mynumcell=1;
|
||||
}
|
||||
|
||||
long Log_base_2(long number)
|
||||
{
|
||||
long cumulative;
|
||||
long out;
|
||||
|
||||
cumulative = 1;
|
||||
for (out = 0; out < 20; out++) {
|
||||
if (cumulative == number) {
|
||||
return(out);
|
||||
}
|
||||
else {
|
||||
cumulative = cumulative * 2;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr,"Log_base_2: couldn't find log2 of %ld\n", number);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* TAB_INIT : allocate body and cell data space
|
||||
*/
|
||||
|
||||
void tab_init()
|
||||
{
|
||||
long i;
|
||||
|
||||
/*allocate leaf/cell space */
|
||||
maxleaf = (long) ((double) fleaves * nbody);
|
||||
maxcell = fcells * maxleaf;
|
||||
for (i = 0; i < NPROC; ++i) {
|
||||
Local[i].ctab = (cellptr) G_MALLOC((maxcell / NPROC) * sizeof(cell));
|
||||
Local[i].ltab = (leafptr) G_MALLOC((maxleaf / NPROC) * sizeof(leaf));
|
||||
}
|
||||
|
||||
/*allocate space for personal lists of body pointers */
|
||||
maxmybody = (nbody+maxleaf*MAX_BODIES_PER_LEAF)/NPROC;
|
||||
Local[0].mybodytab = (bodyptr*) G_MALLOC(NPROC*maxmybody*sizeof(bodyptr));
|
||||
/* space is allocated so that every */
|
||||
/* process can have a maximum of maxmybody pointers to bodies */
|
||||
/* then there is an array of bodies called bodytab which is */
|
||||
/* allocated in the distribution generation or when the distr. */
|
||||
/* file is read */
|
||||
maxmycell = maxcell / NPROC;
|
||||
maxmyleaf = maxleaf / NPROC;
|
||||
Local[0].mycelltab = (cellptr*) G_MALLOC(NPROC*maxmycell*sizeof(cellptr));
|
||||
Local[0].myleaftab = (leafptr*) G_MALLOC(NPROC*maxmyleaf*sizeof(leafptr));
|
||||
|
||||
CellLock = (struct CellLockType *) G_MALLOC(sizeof(struct CellLockType));
|
||||
ALOCKINIT(CellLock->CL,MAXLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* SLAVESTART: main task for each processor
|
||||
*/
|
||||
void SlaveStart()
|
||||
{
|
||||
long ProcessId;
|
||||
|
||||
/* Get unique ProcessId */
|
||||
LOCK(Global->CountLock);
|
||||
ProcessId = Global->current_id++;
|
||||
UNLOCK(Global->CountLock);
|
||||
|
||||
BARINCLUDE(Global->Barrier);
|
||||
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
||||
processors to avoid migration */
|
||||
|
||||
/* initialize mybodytabs */
|
||||
Local[ProcessId].mybodytab = Local[0].mybodytab + (maxmybody * ProcessId);
|
||||
/* note that every process has its own copy */
|
||||
/* of mybodytab, which was initialized to the */
|
||||
/* beginning of the whole array by proc. 0 */
|
||||
/* before create */
|
||||
Local[ProcessId].mycelltab = Local[0].mycelltab + (maxmycell * ProcessId);
|
||||
Local[ProcessId].myleaftab = Local[0].myleaftab + (maxmyleaf * ProcessId);
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute the
|
||||
data across physically distributed memories as desired.
|
||||
|
||||
One way to do this is as follows:
|
||||
|
||||
long i;
|
||||
|
||||
if (ProcessId == 0) {
|
||||
for (i=0;i<NPROC;i++) {
|
||||
Place all addresses x such that
|
||||
&(Local[i]) <= x < &(Local[i])+
|
||||
sizeof(struct local_memory) on node i
|
||||
Place all addresses x such that
|
||||
&(Local[i].mybodytab) <= x < &(Local[i].mybodytab)+
|
||||
maxmybody * sizeof(bodyptr) - 1 on node i
|
||||
Place all addresses x such that
|
||||
&(Local[i].mycelltab) <= x < &(Local[i].mycelltab)+
|
||||
maxmycell * sizeof(cellptr) - 1 on node i
|
||||
Place all addresses x such that
|
||||
&(Local[i].myleaftab) <= x < &(Local[i].myleaftab)+
|
||||
maxmyleaf * sizeof(leafptr) - 1 on node i
|
||||
}
|
||||
}
|
||||
|
||||
barrier(Global->Barstart,NPROC);
|
||||
|
||||
*/
|
||||
|
||||
Local[ProcessId].tout = Local[0].tout;
|
||||
Local[ProcessId].tnow = Local[0].tnow;
|
||||
Local[ProcessId].nstep = Local[0].nstep;
|
||||
|
||||
find_my_initial_bodies(bodytab, nbody, ProcessId);
|
||||
|
||||
/* main loop */
|
||||
while (Local[ProcessId].tnow < tstop + 0.1 * dtime) {
|
||||
stepsystem(ProcessId);
|
||||
// printtree(Global->G_root);
|
||||
// printf("Going to next step!!!\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* STARTRUN: startup hierarchical N-body code.
|
||||
*/
|
||||
|
||||
void startrun()
|
||||
{
|
||||
long seed;
|
||||
|
||||
infile = getparam("in");
|
||||
if (*infile != '\0'/*NULL*/) {
|
||||
inputdata();
|
||||
}
|
||||
else {
|
||||
nbody = getiparam("nbody");
|
||||
if (nbody < 1) {
|
||||
error("startrun: absurd nbody\n");
|
||||
}
|
||||
seed = getiparam("seed");
|
||||
}
|
||||
|
||||
outfile = getparam("out");
|
||||
dtime = getdparam("dtime");
|
||||
dthf = 0.5 * dtime;
|
||||
eps = getdparam("eps");
|
||||
epssq = eps*eps;
|
||||
tol = getdparam("tol");
|
||||
tolsq = tol*tol;
|
||||
fcells = getdparam("fcells");
|
||||
fleaves = getdparam("fleaves");
|
||||
tstop = getdparam("tstop");
|
||||
dtout = getdparam("dtout");
|
||||
NPROC = getiparam("NPROC");
|
||||
Local[0].nstep = 0;
|
||||
pranset(seed);
|
||||
testdata();
|
||||
ANLinit();
|
||||
setbound();
|
||||
Local[0].tout = Local[0].tnow + dtout;
|
||||
}
|
||||
|
||||
/*
|
||||
* TESTDATA: generate Plummer model initial conditions for test runs,
|
||||
* scaled to units such that M = -4E = G = 1 (Henon, Hegge, etc).
|
||||
* See Aarseth, SJ, Henon, M, & Wielen, R (1974) Astr & Ap, 37, 183.
|
||||
*/
|
||||
|
||||
#define MFRAC 0.999 /* mass cut off at MFRAC of total */
|
||||
|
||||
void testdata()
|
||||
{
|
||||
real rsc, vsc, r, v, x, y;
|
||||
vector cmr, cmv;
|
||||
register bodyptr p;
|
||||
long rejects = 0;
|
||||
long halfnbody, i;
|
||||
float offset;
|
||||
register bodyptr cp;
|
||||
|
||||
headline = "Hack code: Plummer model";
|
||||
Local[0].tnow = 0.0;
|
||||
bodytab = (bodyptr) G_MALLOC(nbody * sizeof(body));
|
||||
if (bodytab == NULL) {
|
||||
error("testdata: not enough memory\n");
|
||||
}
|
||||
rsc = 9 * PI / 16;
|
||||
vsc = sqrt(1.0 / rsc);
|
||||
|
||||
CLRV(cmr);
|
||||
CLRV(cmv);
|
||||
|
||||
halfnbody = nbody / 2;
|
||||
if (nbody % 2 != 0) halfnbody++;
|
||||
for (p = bodytab; p < bodytab+halfnbody; p++) {
|
||||
Type(p) = BODY;
|
||||
Mass(p) = 1.0 / nbody;
|
||||
Cost(p) = 1;
|
||||
|
||||
r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1);
|
||||
/* reject radii greater than 10 */
|
||||
while (r > 9.0) {
|
||||
rejects++;
|
||||
r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1);
|
||||
}
|
||||
pickshell(Pos(p), rsc * r);
|
||||
ADDV(cmr, cmr, Pos(p));
|
||||
do {
|
||||
x = xrand(0.0, 1.0);
|
||||
y = xrand(0.0, 0.1);
|
||||
|
||||
} while (y > x*x * pow(1 - x*x, 3.5));
|
||||
|
||||
v = sqrt(2.0) * x / pow(1 + r*r, 0.25);
|
||||
pickshell(Vel(p), vsc * v);
|
||||
ADDV(cmv, cmv, Vel(p));
|
||||
}
|
||||
|
||||
offset = 4.0;
|
||||
|
||||
for (p = bodytab + halfnbody; p < bodytab+nbody; p++) {
|
||||
Type(p) = BODY;
|
||||
Mass(p) = 1.0 / nbody;
|
||||
Cost(p) = 1;
|
||||
|
||||
cp = p - halfnbody;
|
||||
for (i = 0; i < NDIM; i++){
|
||||
Pos(p)[i] = Pos(cp)[i] + offset;
|
||||
Vel(p)[i] = Vel(cp)[i];
|
||||
}
|
||||
ADDV(cmr, cmr, Pos(p));
|
||||
ADDV(cmv, cmv, Vel(p));
|
||||
}
|
||||
|
||||
DIVVS(cmr, cmr, (real) nbody);
|
||||
DIVVS(cmv, cmv, (real) nbody);
|
||||
|
||||
for (p = bodytab; p < bodytab+nbody; p++) {
|
||||
SUBV(Pos(p), Pos(p), cmr);
|
||||
SUBV(Vel(p), Vel(p), cmv);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* PICKSHELL: pick a random point on a sphere of specified radius.
|
||||
*/
|
||||
|
||||
void pickshell(real vec[], real rad)
|
||||
{
|
||||
register long k;
|
||||
double rsq, rsc;
|
||||
|
||||
do {
|
||||
for (k = 0; k < NDIM; k++) {
|
||||
vec[k] = xrand(-1.0, 1.0);
|
||||
}
|
||||
DOTVP(rsq, vec, vec);
|
||||
} while (rsq > 1.0);
|
||||
|
||||
rsc = rad / sqrt(rsq);
|
||||
MULVS(vec, vec, rsc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
long intpow(long i, long j)
|
||||
{
|
||||
long k;
|
||||
long temp = 1;
|
||||
|
||||
for (k = 0; k < j; k++)
|
||||
temp = temp*i;
|
||||
return temp;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* STEPSYSTEM: advance N-body system one time-step.
|
||||
*/
|
||||
|
||||
void stepsystem(long ProcessId)
|
||||
{
|
||||
long i;
|
||||
real Cavg;
|
||||
bodyptr p,*pp;
|
||||
vector dvel, vel1, dpos;
|
||||
long trackstart, trackend;
|
||||
long partitionstart, partitionend;
|
||||
long treebuildstart, treebuildend;
|
||||
long forcecalcstart, forcecalcend;
|
||||
|
||||
if (Local[ProcessId].nstep == 2) {
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
||||
statistics that one is measuring about the parallel execution */
|
||||
}
|
||||
|
||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
||||
CLOCK(trackstart);
|
||||
}
|
||||
|
||||
if (ProcessId == 0) {
|
||||
init_root();
|
||||
}
|
||||
else {
|
||||
Local[ProcessId].mynumcell = 0;
|
||||
Local[ProcessId].mynumleaf = 0;
|
||||
}
|
||||
|
||||
|
||||
/* start at same time */
|
||||
BARRIER(Global->Barrier,NPROC);
|
||||
|
||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
||||
CLOCK(treebuildstart);
|
||||
}
|
||||
|
||||
/* load bodies into tree */
|
||||
maketree(ProcessId);
|
||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
||||
CLOCK(treebuildend);
|
||||
Global->treebuildtime += treebuildend - treebuildstart;
|
||||
}
|
||||
|
||||
Housekeep(ProcessId);
|
||||
|
||||
Cavg = (real) Cost(Global->G_root) / (real)NPROC ;
|
||||
Local[ProcessId].workMin = (long) (Cavg * ProcessId);
|
||||
Local[ProcessId].workMax = (long) (Cavg * (ProcessId + 1)
|
||||
+ (ProcessId == (NPROC - 1)));
|
||||
|
||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
||||
CLOCK(partitionstart);
|
||||
}
|
||||
|
||||
Local[ProcessId].mynbody = 0;
|
||||
find_my_bodies(Global->G_root, 0, BRC_FUC, ProcessId );
|
||||
|
||||
/* B*RRIER(Global->Barcom,NPROC); */
|
||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
||||
CLOCK(partitionend);
|
||||
Global->partitiontime += partitionend - partitionstart;
|
||||
}
|
||||
|
||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
||||
CLOCK(forcecalcstart);
|
||||
}
|
||||
|
||||
ComputeForces(ProcessId);
|
||||
|
||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
||||
CLOCK(forcecalcend);
|
||||
Global->forcecalctime += forcecalcend - forcecalcstart;
|
||||
}
|
||||
|
||||
/* advance my bodies */
|
||||
for (pp = Local[ProcessId].mybodytab;
|
||||
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody; pp++) {
|
||||
p = *pp;
|
||||
MULVS(dvel, Acc(p), dthf);
|
||||
ADDV(vel1, Vel(p), dvel);
|
||||
MULVS(dpos, vel1, dtime);
|
||||
ADDV(Pos(p), Pos(p), dpos);
|
||||
ADDV(Vel(p), vel1, dvel);
|
||||
|
||||
for (i = 0; i < NDIM; i++) {
|
||||
if (Pos(p)[i]<Local[ProcessId].min[i]) {
|
||||
Local[ProcessId].min[i]=Pos(p)[i];
|
||||
}
|
||||
if (Pos(p)[i]>Local[ProcessId].max[i]) {
|
||||
Local[ProcessId].max[i]=Pos(p)[i] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
LOCK(Global->CountLock);
|
||||
for (i = 0; i < NDIM; i++) {
|
||||
if (Global->min[i] > Local[ProcessId].min[i]) {
|
||||
Global->min[i] = Local[ProcessId].min[i];
|
||||
}
|
||||
if (Global->max[i] < Local[ProcessId].max[i]) {
|
||||
Global->max[i] = Local[ProcessId].max[i];
|
||||
}
|
||||
}
|
||||
UNLOCK(Global->CountLock);
|
||||
|
||||
/* bar needed to make sure that every process has computed its min */
|
||||
/* and max coordinates, and has accumulated them into the global */
|
||||
/* min and max, before the new dimensions are computed */
|
||||
BARRIER(Global->Barrier,NPROC);
|
||||
|
||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
||||
CLOCK(trackend);
|
||||
Global->tracktime += trackend - trackstart;
|
||||
}
|
||||
if (ProcessId==0) {
|
||||
Global->rsize=0;
|
||||
SUBV(Global->max,Global->max,Global->min);
|
||||
for (i = 0; i < NDIM; i++) {
|
||||
if (Global->rsize < Global->max[i]) {
|
||||
Global->rsize = Global->max[i];
|
||||
}
|
||||
}
|
||||
ADDVS(Global->rmin,Global->min,-Global->rsize/100000.0);
|
||||
Global->rsize = 1.00002*Global->rsize;
|
||||
SETVS(Global->min,1E99);
|
||||
SETVS(Global->max,-1E99);
|
||||
}
|
||||
Local[ProcessId].nstep++;
|
||||
Local[ProcessId].tnow = Local[ProcessId].tnow + dtime;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ComputeForces(long ProcessId)
|
||||
{
|
||||
bodyptr p,*pp;
|
||||
vector acc1, dacc, dvel;
|
||||
|
||||
for (pp = Local[ProcessId].mybodytab;
|
||||
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody;pp++) {
|
||||
p = *pp;
|
||||
SETV(acc1, Acc(p));
|
||||
Cost(p)=0;
|
||||
hackgrav(p,ProcessId);
|
||||
Local[ProcessId].myn2bcalc += Local[ProcessId].myn2bterm;
|
||||
Local[ProcessId].mynbccalc += Local[ProcessId].mynbcterm;
|
||||
if (!Local[ProcessId].skipself) { /* did we miss self-int? */
|
||||
Local[ProcessId].myselfint++; /* count another goofup */
|
||||
}
|
||||
if (Local[ProcessId].nstep > 0) {
|
||||
/* use change in accel to make 2nd order correction to vel */
|
||||
SUBV(dacc, Acc(p), acc1);
|
||||
MULVS(dvel, dacc, dthf);
|
||||
ADDV(Vel(p), Vel(p), dvel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* FIND_MY_INITIAL_BODIES: puts into mybodytab the initial list of bodies
|
||||
* assigned to the processor.
|
||||
*/
|
||||
|
||||
void find_my_initial_bodies(bodyptr btab, long nbody, long ProcessId)
|
||||
{
|
||||
long extra,offset,i;
|
||||
|
||||
Local[ProcessId].mynbody = nbody / NPROC;
|
||||
extra = nbody % NPROC;
|
||||
if (ProcessId < extra) {
|
||||
Local[ProcessId].mynbody++;
|
||||
offset = Local[ProcessId].mynbody * ProcessId;
|
||||
}
|
||||
if (ProcessId >= extra) {
|
||||
offset = (Local[ProcessId].mynbody+1) * extra + (ProcessId - extra)
|
||||
* Local[ProcessId].mynbody;
|
||||
}
|
||||
for (i=0; i < Local[ProcessId].mynbody; i++) {
|
||||
Local[ProcessId].mybodytab[i] = &(btab[offset+i]);
|
||||
}
|
||||
BARRIER(Global->Barrier,NPROC);
|
||||
}
|
||||
|
||||
|
||||
void find_my_bodies(nodeptr mycell, long work, long direction, long ProcessId)
|
||||
{
|
||||
long i;
|
||||
leafptr l;
|
||||
nodeptr qptr;
|
||||
|
||||
if (Type(mycell) == LEAF) {
|
||||
l = (leafptr) mycell;
|
||||
for (i = 0; i < l->num_bodies; i++) {
|
||||
if (work >= Local[ProcessId].workMin - .1) {
|
||||
if((Local[ProcessId].mynbody+2) > maxmybody) {
|
||||
error("find_my_bodies: Processor %ld needs more than %ld bodies; increase fleaves\n", ProcessId, maxmybody);
|
||||
}
|
||||
Local[ProcessId].mybodytab[Local[ProcessId].mynbody++] =
|
||||
Bodyp(l)[i];
|
||||
}
|
||||
work += Cost(Bodyp(l)[i]);
|
||||
if (work >= Local[ProcessId].workMax-.1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for(i = 0; (i < NSUB) && (work < (Local[ProcessId].workMax - .1)); i++){
|
||||
qptr = Subp(mycell)[Child_Sequence[direction][i]];
|
||||
if (qptr!=NULL) {
|
||||
if ((work+Cost(qptr)) >= (Local[ProcessId].workMin -.1)) {
|
||||
find_my_bodies(qptr,work, Direction_Sequence[direction][i],
|
||||
ProcessId);
|
||||
}
|
||||
work += Cost(qptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* HOUSEKEEP: reinitialize the different variables (in particular global
|
||||
* variables) between each time step.
|
||||
*/
|
||||
|
||||
void Housekeep(long ProcessId)
|
||||
{
|
||||
Local[ProcessId].myn2bcalc = Local[ProcessId].mynbccalc
|
||||
= Local[ProcessId].myselfint = 0;
|
||||
SETVS(Local[ProcessId].min,1E99);
|
||||
SETVS(Local[ProcessId].max,-1E99);
|
||||
}
|
||||
|
||||
/*
|
||||
* SETBOUND: Compute the initial size of the root of the tree; only done
|
||||
* before first time step, and only processor 0 does it
|
||||
*/
|
||||
void setbound()
|
||||
{
|
||||
long i;
|
||||
real side ;
|
||||
bodyptr p;
|
||||
|
||||
SETVS(Local[0].min,1E99);
|
||||
SETVS(Local[0].max,-1E99);
|
||||
side=0;
|
||||
|
||||
for (p = bodytab; p < bodytab+nbody; p++) {
|
||||
for (i=0; i<NDIM;i++) {
|
||||
if (Pos(p)[i]<Local[0].min[i]) Local[0].min[i]=Pos(p)[i] ;
|
||||
if (Pos(p)[i]>Local[0].max[i]) Local[0].max[i]=Pos(p)[i] ;
|
||||
}
|
||||
}
|
||||
|
||||
SUBV(Local[0].max,Local[0].max,Local[0].min);
|
||||
for (i=0; i<NDIM;i++) if (side<Local[0].max[i]) side=Local[0].max[i];
|
||||
ADDVS(Global->rmin,Local[0].min,-side/100000.0);
|
||||
Global->rsize = 1.00002*side;
|
||||
SETVS(Global->max,-1E99);
|
||||
SETVS(Global->min,1E99);
|
||||
}
|
||||
|
||||
void Help()
|
||||
{
|
||||
printf("There are a total of twelve parameters, and all of them have default values.\n");
|
||||
printf("\n");
|
||||
printf("1) infile (char*) : The name of an input file that contains particle data. \n");
|
||||
printf(" The format of the file is:\n");
|
||||
printf("\ta) An int representing the number of particles in the distribution\n");
|
||||
printf("\tb) An int representing the dimensionality of the problem (3-D)\n");
|
||||
printf("\tc) A double representing the current time of the simulation\n");
|
||||
printf("\td) Doubles representing the masses of all the particles\n");
|
||||
printf("\te) A vector (length equal to the dimensionality) of doubles\n");
|
||||
printf("\t representing the positions of all the particles\n");
|
||||
printf("\tf) A vector (length equal to the dimensionality) of doubles\n");
|
||||
printf("\t representing the velocities of all the particles\n");
|
||||
printf("\n");
|
||||
printf(" Each of these numbers can be separated by any amount of whitespace.\n");
|
||||
printf("\n");
|
||||
printf("2) nbody (int) : If no input file is specified (the first line is blank), this\n");
|
||||
printf(" number specifies the number of particles to generate under a plummer model.\n");
|
||||
printf(" Default is 16384.\n");
|
||||
printf("\n");
|
||||
printf("3) seed (int) : The seed used by the random number generator.\n");
|
||||
printf(" Default is 123.\n");
|
||||
printf("\n");
|
||||
printf("4) outfile (char*) : The name of the file that snapshots will be printed to. \n");
|
||||
printf(" This feature has been disabled in the SPLASH release.\n");
|
||||
printf(" Default is NULL.\n");
|
||||
printf("\n");
|
||||
printf("5) dtime (double) : The integration time-step.\n");
|
||||
printf(" Default is 0.025.\n");
|
||||
printf("\n");
|
||||
printf("6) eps (double) : The usual potential softening\n");
|
||||
printf(" Default is 0.05.\n");
|
||||
printf("\n");
|
||||
printf("7) tol (double) : The cell subdivision tolerance.\n");
|
||||
printf(" Default is 1.0.\n");
|
||||
printf("\n");
|
||||
printf("8) fcells (double) : The total number of cells created is equal to \n");
|
||||
printf(" fcells * number of leaves.\n");
|
||||
printf(" Default is 2.0.\n");
|
||||
printf("\n");
|
||||
printf("9) fleaves (double) : The total number of leaves created is equal to \n");
|
||||
printf(" fleaves * nbody.\n");
|
||||
printf(" Default is 0.5.\n");
|
||||
printf("\n");
|
||||
printf("10) tstop (double) : The time to stop integration.\n");
|
||||
printf(" Default is 0.075.\n");
|
||||
printf("\n");
|
||||
printf("11) dtout (double) : The data-output interval.\n");
|
||||
printf(" Default is 0.25.\n");
|
||||
printf("\n");
|
||||
printf("12) NPROC (int) : The number of processors.\n");
|
||||
printf(" Default is 1.\n");
|
||||
}
|
155
splash2/codes/apps/barnes/code.H
Normal file
155
splash2/codes/apps/barnes/code.H
Normal file
|
@ -0,0 +1,155 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/*
|
||||
* CODE.H: define various global things for CODE.C.
|
||||
*/
|
||||
|
||||
#ifndef _CODE_H_
|
||||
#define _CODE_H_
|
||||
|
||||
#define PAD_SIZE (PAGE_SIZE / (sizeof(long)))
|
||||
|
||||
/* Defined by the input file */
|
||||
global string headline; /* message describing calculation */
|
||||
global string infile; /* file name for snapshot input */
|
||||
global string outfile; /* file name for snapshot output */
|
||||
global real dtime; /* timestep for leapfrog integrator */
|
||||
global real dtout; /* time between data outputs */
|
||||
global real tstop; /* time to stop calculation */
|
||||
global long nbody; /* number of bodies in system */
|
||||
global real fcells; /* ratio of cells/leaves allocated */
|
||||
global real fleaves; /* ratio of leaves/bodies allocated */
|
||||
global real tol; /* accuracy parameter: 0.0 => exact */
|
||||
global real tolsq; /* square of previous */
|
||||
global real eps; /* potential softening parameter */
|
||||
global real epssq; /* square of previous */
|
||||
global real dthf; /* half time step */
|
||||
global long NPROC; /* Number of Processors */
|
||||
|
||||
global long maxcell; /* max number of cells allocated */
|
||||
global long maxleaf; /* max number of leaves allocated */
|
||||
global long maxmybody; /* max no. of bodies allocated per processor */
|
||||
global long maxmycell; /* max num. of cells to be allocated */
|
||||
global long maxmyleaf; /* max num. of leaves to be allocated */
|
||||
global bodyptr bodytab; /* array size is exactly nbody bodies */
|
||||
|
||||
global struct CellLockType {
|
||||
ALOCKDEC(CL,MAXLOCK) /* locks on the cells*/
|
||||
} *CellLock;
|
||||
|
||||
struct GlobalMemory { /* all this info is for the whole system */
|
||||
long n2bcalc; /* total number of body/cell interactions */
|
||||
long nbccalc; /* total number of body/body interactions */
|
||||
long selfint; /* number of self interactions */
|
||||
real mtot; /* total mass of N-body system */
|
||||
real etot[3]; /* binding, kinetic, potential energy */
|
||||
matrix keten; /* kinetic energy tensor */
|
||||
matrix peten; /* potential energy tensor */
|
||||
vector cmphase[2]; /* center of mass coordinates and velocity */
|
||||
vector amvec; /* angular momentum vector */
|
||||
cellptr G_root; /* root of the whole tree */
|
||||
vector rmin; /* lower-left corner of coordinate box */
|
||||
vector min; /* temporary lower-left corner of the box */
|
||||
vector max; /* temporary upper right corner of the box */
|
||||
real rsize; /* side-length of integer coordinate box */
|
||||
BARDEC(Barrier) /* barrier at the beginning of stepsystem */
|
||||
LOCKDEC(CountLock) /* Lock on the shared variables */
|
||||
LOCKDEC(NcellLock) /* Lock on the counter of array of cells for loadtree */
|
||||
LOCKDEC(NleafLock)/* Lock on the counter of array of leaves for loadtree */
|
||||
LOCKDEC(io_lock)
|
||||
unsigned long createstart,createend,computestart,computeend;
|
||||
unsigned long trackstart, trackend, tracktime;
|
||||
unsigned long partitionstart, partitionend, partitiontime;
|
||||
unsigned long treebuildstart, treebuildend, treebuildtime;
|
||||
unsigned long forcecalcstart, forcecalcend, forcecalctime;
|
||||
long current_id;
|
||||
volatile long k; /*for memory allocation in code.C */
|
||||
};
|
||||
global struct GlobalMemory *Global;
|
||||
|
||||
/* This structure is needed because under the sproc model there is no
|
||||
* per processor private address space.
|
||||
*/
|
||||
struct local_memory {
|
||||
/* Use padding so that each processor's variables are on their own page */
|
||||
long pad_begin[PAD_SIZE];
|
||||
|
||||
real tnow; /* current value of simulation time */
|
||||
real tout; /* time next output is due */
|
||||
long nstep; /* number of integration steps so far */
|
||||
|
||||
long workMin, workMax;/* interval of cost to be treated by a proc */
|
||||
|
||||
vector min, max; /* min and max of coordinates for each Proc. */
|
||||
|
||||
long mynumcell; /* num. of cells used for this proc in ctab */
|
||||
long mynumleaf; /* num. of leaves used for this proc in ctab */
|
||||
long mynbody; /* num bodies allocated to the processor */
|
||||
bodyptr* mybodytab; /* array of bodies allocated / processor */
|
||||
long myncell; /* num cells allocated to the processor */
|
||||
cellptr* mycelltab; /* array of cellptrs allocated to the processor */
|
||||
long mynleaf; /* number of leaves allocated to the processor */
|
||||
leafptr* myleaftab; /* array of leafptrs allocated to the processor */
|
||||
cellptr ctab; /* array of cells used for the tree. */
|
||||
leafptr ltab; /* array of cells used for the tree. */
|
||||
|
||||
long myn2bcalc; /* body-body force calculations for each processor */
|
||||
long mynbccalc; /* body-cell force calculations for each processor */
|
||||
long myselfint; /* count self-interactions for each processor */
|
||||
long myn2bterm; /* count body-body terms for a body */
|
||||
long mynbcterm; /* count body-cell terms for a body */
|
||||
bool skipself; /* true if self-interaction skipped OK */
|
||||
bodyptr pskip; /* body to skip in force evaluation */
|
||||
vector pos0; /* point at which to evaluate field */
|
||||
real phi0; /* computed potential at pos0 */
|
||||
vector acc0; /* computed acceleration at pos0 */
|
||||
vector dr; /* data to be shared */
|
||||
real drsq; /* between gravsub and subdivp */
|
||||
nodeptr pmem; /* remember particle data */
|
||||
|
||||
nodeptr Current_Root;
|
||||
long Root_Coords[NDIM];
|
||||
|
||||
real mymtot; /* total mass of N-body system */
|
||||
real myetot[3]; /* binding, kinetic, potential energy */
|
||||
matrix myketen; /* kinetic energy tensor */
|
||||
matrix mypeten; /* potential energy tensor */
|
||||
vector mycmphase[2]; /* center of mass coordinates */
|
||||
vector myamvec; /* angular momentum vector */
|
||||
|
||||
long pad_end[PAD_SIZE];
|
||||
};
|
||||
global struct local_memory Local[MAX_PROC];
|
||||
|
||||
void SlaveStart(void);
|
||||
void stepsystem(long ProcessId);
|
||||
void ComputeForces(long ProcessId);
|
||||
void Help(void);
|
||||
void ANLinit(void);
|
||||
void init_root(void);
|
||||
void tab_init(void);
|
||||
void startrun(void);
|
||||
void testdata(void);
|
||||
void pickshell(real vec[], real rad);
|
||||
void find_my_initial_bodies(bodyptr btab, long nbody, long ProcessId);
|
||||
void find_my_bodies(nodeptr mycell, long work, long direction, long ProcessId);
|
||||
void Housekeep(long ProcessId);
|
||||
void setbound(void);
|
||||
long Log_base_2(long number);
|
||||
long intpow(long i, long j);
|
||||
|
||||
#endif
|
235
splash2/codes/apps/barnes/code_io.C
Normal file
235
splash2/codes/apps/barnes/code_io.C
Normal file
|
@ -0,0 +1,235 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/*
|
||||
* CODE_IO.C:
|
||||
*/
|
||||
EXTERN_ENV
|
||||
#define global extern
|
||||
|
||||
#include "stdinc.h"
|
||||
|
||||
/*
|
||||
* INPUTDATA: read initial conditions from input file.
|
||||
*/
|
||||
|
||||
void inputdata ()
|
||||
{
|
||||
stream instr;
|
||||
permanent char headbuf[128];
|
||||
long ndim;
|
||||
real tnow;
|
||||
bodyptr p;
|
||||
long i;
|
||||
|
||||
fprintf(stderr,"reading input file : %s\n",infile);
|
||||
fflush(stderr);
|
||||
instr = fopen(infile, "r");
|
||||
if (instr == NULL)
|
||||
error("inputdata: cannot find file %s\n", infile);
|
||||
sprintf(headbuf, "Hack code: input file %s\n", infile);
|
||||
headline = headbuf;
|
||||
in_int(instr, &nbody);
|
||||
if (nbody < 1)
|
||||
error("inputdata: nbody = %ld is absurd\n", nbody);
|
||||
in_int(instr, &ndim);
|
||||
if (ndim != NDIM)
|
||||
error("inputdata: NDIM = %ld ndim = %ld is absurd\n", NDIM, ndim);
|
||||
in_real(instr, &tnow);
|
||||
for (i = 0; i < MAX_PROC; i++) {
|
||||
Local[i].tnow = tnow;
|
||||
}
|
||||
bodytab = (bodyptr) G_MALLOC(nbody * sizeof(body));
|
||||
if (bodytab == NULL)
|
||||
error("inputdata: not enuf memory\n");
|
||||
for (p = bodytab; p < bodytab+nbody; p++) {
|
||||
Type(p) = BODY;
|
||||
Cost(p) = 1;
|
||||
Phi(p) = 0.0;
|
||||
CLRV(Acc(p));
|
||||
}
|
||||
for (p = bodytab; p < bodytab+nbody; p++)
|
||||
in_real(instr, &Mass(p));
|
||||
for (p = bodytab; p < bodytab+nbody; p++)
|
||||
in_vector(instr, Pos(p));
|
||||
for (p = bodytab; p < bodytab+nbody; p++)
|
||||
in_vector(instr, Vel(p));
|
||||
fclose(instr);
|
||||
}
|
||||
|
||||
/*
|
||||
* INITOUTPUT: initialize output routines.
|
||||
*/
|
||||
|
||||
|
||||
void initoutput()
|
||||
{
|
||||
printf("\n\t\t%s\n\n", headline);
|
||||
printf("%10s%10s%10s%10s%10s%10s%10s%10s\n",
|
||||
"nbody", "dtime", "eps", "tol", "dtout", "tstop","fcells","NPROC");
|
||||
printf("%10ld%10.5f%10.4f%10.2f%10.3f%10.3f%10.2f%10ld\n\n",
|
||||
nbody, dtime, eps, tol, dtout, tstop, fcells, NPROC);
|
||||
}
|
||||
|
||||
/*
|
||||
* STOPOUTPUT: finish up after a run.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* OUTPUT: compute diagnostics and output data.
|
||||
*/
|
||||
|
||||
void output(long ProcessId)
|
||||
{
|
||||
long nttot, nbavg, ncavg,k;
|
||||
vector tempv1,tempv2;
|
||||
|
||||
if ((Local[ProcessId].tout - 0.01 * dtime) <= Local[ProcessId].tnow) {
|
||||
Local[ProcessId].tout += dtout;
|
||||
}
|
||||
|
||||
diagnostics(ProcessId);
|
||||
|
||||
if (Local[ProcessId].mymtot!=0) {
|
||||
LOCK(Global->CountLock);
|
||||
Global->n2bcalc += Local[ProcessId].myn2bcalc;
|
||||
Global->nbccalc += Local[ProcessId].mynbccalc;
|
||||
Global->selfint += Local[ProcessId].myselfint;
|
||||
ADDM(Global->keten, Global-> keten, Local[ProcessId].myketen);
|
||||
ADDM(Global->peten, Global-> peten, Local[ProcessId].mypeten);
|
||||
for (k=0;k<3;k++) Global->etot[k] += Local[ProcessId].myetot[k];
|
||||
ADDV(Global->amvec, Global-> amvec, Local[ProcessId].myamvec);
|
||||
|
||||
MULVS(tempv1, Global->cmphase[0],Global->mtot);
|
||||
MULVS(tempv2, Local[ProcessId].mycmphase[0], Local[ProcessId].mymtot);
|
||||
ADDV(tempv1, tempv1, tempv2);
|
||||
DIVVS(Global->cmphase[0], tempv1, Global->mtot+Local[ProcessId].mymtot);
|
||||
|
||||
MULVS(tempv1, Global->cmphase[1],Global->mtot);
|
||||
MULVS(tempv2, Local[ProcessId].mycmphase[1], Local[ProcessId].mymtot);
|
||||
ADDV(tempv1, tempv1, tempv2);
|
||||
DIVVS(Global->cmphase[1], tempv1, Global->mtot+Local[ProcessId].mymtot);
|
||||
Global->mtot +=Local[ProcessId].mymtot;
|
||||
UNLOCK(Global->CountLock);
|
||||
}
|
||||
|
||||
BARRIER(Global->Barrier,NPROC);
|
||||
|
||||
if (ProcessId==0) {
|
||||
nttot = Global->n2bcalc + Global->nbccalc;
|
||||
nbavg = (long) ((real) Global->n2bcalc / (real) nbody);
|
||||
ncavg = (long) ((real) Global->nbccalc / (real) nbody);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* DIAGNOSTICS: compute set of dynamical diagnostics.
|
||||
*/
|
||||
|
||||
void diagnostics(long ProcessId)
|
||||
{
|
||||
register bodyptr p,*pp;
|
||||
real velsq;
|
||||
vector tmpv;
|
||||
matrix tmpt;
|
||||
|
||||
Local[ProcessId].mymtot = 0.0;
|
||||
Local[ProcessId].myetot[1] = Local[ProcessId].myetot[2] = 0.0;
|
||||
CLRM(Local[ProcessId].myketen);
|
||||
CLRM(Local[ProcessId].mypeten);
|
||||
CLRV(Local[ProcessId].mycmphase[0]);
|
||||
CLRV(Local[ProcessId].mycmphase[1]);
|
||||
CLRV(Local[ProcessId].myamvec);
|
||||
for (pp = Local[ProcessId].mybodytab+Local[ProcessId].mynbody -1;
|
||||
pp >= Local[ProcessId].mybodytab; pp--) {
|
||||
p= *pp;
|
||||
Local[ProcessId].mymtot += Mass(p);
|
||||
DOTVP(velsq, Vel(p), Vel(p));
|
||||
Local[ProcessId].myetot[1] += 0.5 * Mass(p) * velsq;
|
||||
Local[ProcessId].myetot[2] += 0.5 * Mass(p) * Phi(p);
|
||||
MULVS(tmpv, Vel(p), 0.5 * Mass(p));
|
||||
OUTVP(tmpt, tmpv, Vel(p));
|
||||
ADDM(Local[ProcessId].myketen, Local[ProcessId].myketen, tmpt);
|
||||
MULVS(tmpv, Pos(p), Mass(p));
|
||||
OUTVP(tmpt, tmpv, Acc(p));
|
||||
ADDM(Local[ProcessId].mypeten, Local[ProcessId].mypeten, tmpt);
|
||||
MULVS(tmpv, Pos(p), Mass(p));
|
||||
ADDV(Local[ProcessId].mycmphase[0], Local[ProcessId].mycmphase[0], tmpv);
|
||||
MULVS(tmpv, Vel(p), Mass(p));
|
||||
ADDV(Local[ProcessId].mycmphase[1], Local[ProcessId].mycmphase[1], tmpv);
|
||||
CROSSVP(tmpv, Pos(p), Vel(p));
|
||||
MULVS(tmpv, tmpv, Mass(p));
|
||||
ADDV(Local[ProcessId].myamvec, Local[ProcessId].myamvec, tmpv);
|
||||
}
|
||||
Local[ProcessId].myetot[0] = Local[ProcessId].myetot[1]
|
||||
+ Local[ProcessId].myetot[2];
|
||||
if (Local[ProcessId].mymtot!=0){
|
||||
DIVVS(Local[ProcessId].mycmphase[0], Local[ProcessId].mycmphase[0],
|
||||
Local[ProcessId].mymtot);
|
||||
DIVVS(Local[ProcessId].mycmphase[1], Local[ProcessId].mycmphase[1],
|
||||
Local[ProcessId].mymtot);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Low-level input and output operations.
|
||||
*/
|
||||
|
||||
void in_int(stream str, long *iptr)
|
||||
{
|
||||
if (fscanf(str, "%ld", iptr) != 1)
|
||||
error("in_int: input conversion print_error\n");
|
||||
}
|
||||
|
||||
void in_real(stream str, real *rptr)
|
||||
{
|
||||
double tmp;
|
||||
|
||||
if (fscanf(str, "%lf", &tmp) != 1)
|
||||
error("in_real: input conversion print_error\n");
|
||||
*rptr = tmp;
|
||||
}
|
||||
|
||||
void in_vector(stream str, vector vec)
|
||||
{
|
||||
double tmpx, tmpy, tmpz;
|
||||
|
||||
if (fscanf(str, "%lf%lf%lf", &tmpx, &tmpy, &tmpz) != 3)
|
||||
error("in_vector: input conversion print_error\n");
|
||||
vec[0] = tmpx; vec[1] = tmpy; vec[2] = tmpz;
|
||||
}
|
||||
|
||||
void out_int(stream str, long ival)
|
||||
{
|
||||
fprintf(str, " %ld\n", ival);
|
||||
}
|
||||
|
||||
void out_real(stream str, real rval)
|
||||
{
|
||||
fprintf(str, " %21.14E\n", rval);
|
||||
}
|
||||
|
||||
void out_vector(stream str, vector vec)
|
||||
{
|
||||
fprintf(str, " %21.14E %21.14E", vec[0], vec[1]);
|
||||
fprintf(str, " %21.14E\n",vec[2]);
|
||||
}
|
||||
|
15
splash2/codes/apps/barnes/code_io.H
Normal file
15
splash2/codes/apps/barnes/code_io.H
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef _CODE_IO_H_
|
||||
#define _CODE_IO_H_
|
||||
|
||||
void inputdata(void);
|
||||
void initoutput(void);
|
||||
void output(long ProcessId);
|
||||
void diagnostics(long ProcessId);
|
||||
void in_int(stream str, long *iptr);
|
||||
void in_real(stream str, real *rptr);
|
||||
void in_vector(stream str, vector vec);
|
||||
void out_int(stream str, long ival);
|
||||
void out_real(stream str, real rval);
|
||||
void out_vector(stream str, vector vec);
|
||||
|
||||
#endif
|
168
splash2/codes/apps/barnes/defs.H
Normal file
168
splash2/codes/apps/barnes/defs.H
Normal file
|
@ -0,0 +1,168 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _DEFS_H_
|
||||
#define _DEFS_H_
|
||||
|
||||
//#include <assert.h>
|
||||
|
||||
#define MAX_PROC 128
|
||||
#define MAX_BODIES_PER_LEAF 10
|
||||
#define MAXLOCK 2048 /* maximum number of locks on DASH */
|
||||
#define PAGE_SIZE 4096 /* in bytes */
|
||||
|
||||
#define NSUB (1 << NDIM) /* subcells per cell */
|
||||
|
||||
/*
|
||||
* BODY and CELL data structures are used to represent the tree:
|
||||
*
|
||||
* +-----------------------------------------------------------+
|
||||
* root--> | CELL: mass, pos, cost, quad, /, o, /, /, /, /, o, /, done |
|
||||
* +---------------------------------|--------------|----------+
|
||||
* | |
|
||||
* +--------------------------------------+ |
|
||||
* | |
|
||||
* | +--------------------------------------+ |
|
||||
* +--> | BODY: mass, pos, cost, vel, acc, phi | |
|
||||
* +--------------------------------------+ |
|
||||
* |
|
||||
* +-----------------------------------------------------+
|
||||
* |
|
||||
* | +-----------------------------------------------------------+
|
||||
* +--> | CELL: mass, pos, cost, quad, o, /, /, o, /, /, o, /, done |
|
||||
* +------------------------------|--------|--------|----------+
|
||||
* etc etc etc
|
||||
*/
|
||||
|
||||
/*
|
||||
* NODE: data common to BODY and CELL structures.
|
||||
*/
|
||||
|
||||
typedef struct _node {
|
||||
long type; /* code for node type: body or cell */
|
||||
real mass; /* total mass of node */
|
||||
vector pos; /* position of node */
|
||||
long cost; /* number of interactions computed */
|
||||
long level;
|
||||
struct _node *parent; /* ptr to parent of this node in tree */
|
||||
long child_num; /* Index that this node should be put
|
||||
at in parent cell */
|
||||
} node;
|
||||
|
||||
typedef node* nodeptr;
|
||||
|
||||
#define Type(x) (((nodeptr) (x))->type)
|
||||
#define Mass(x) (((nodeptr) (x))->mass)
|
||||
#define Pos(x) (((nodeptr) (x))->pos)
|
||||
#define Cost(x) (((nodeptr) (x))->cost)
|
||||
#define Level(x) (((nodeptr) (x))->level)
|
||||
#define Parent(x) (((nodeptr) (x))->parent)
|
||||
#define ChildNum(x) (((nodeptr) (x))->child_num)
|
||||
|
||||
/*
|
||||
* BODY: data structure used to represent particles.
|
||||
*/
|
||||
|
||||
typedef struct _body* bodyptr;
|
||||
typedef struct _leaf* leafptr;
|
||||
typedef struct _cell* cellptr;
|
||||
|
||||
#define BODY 01 /* type code for bodies */
|
||||
|
||||
typedef struct _body {
|
||||
long type;
|
||||
real mass; /* mass of body */
|
||||
vector pos; /* position of body */
|
||||
long cost; /* number of interactions computed */
|
||||
long level;
|
||||
leafptr parent;
|
||||
long child_num; /* Index that this node should be put */
|
||||
vector vel; /* velocity of body */
|
||||
vector acc; /* acceleration of body */
|
||||
real phi; /* potential at body */
|
||||
} body;
|
||||
|
||||
#define Vel(x) (((bodyptr) (x))->vel)
|
||||
#define Acc(x) (((bodyptr) (x))->acc)
|
||||
#define Phi(x) (((bodyptr) (x))->phi)
|
||||
|
||||
/*
|
||||
* CELL: structure used to represent internal nodes of tree.
|
||||
*/
|
||||
|
||||
#define CELL 02 /* type code for cells */
|
||||
|
||||
typedef struct _cell {
|
||||
long type;
|
||||
real mass; /* total mass of cell */
|
||||
vector pos; /* cm. position of cell */
|
||||
long cost; /* number of interactions computed */
|
||||
long level;
|
||||
cellptr parent;
|
||||
long child_num; /* Index [0..8] that this node should be put */
|
||||
long processor; /* Used by partition code */
|
||||
struct _cell *next, *prev; /* Used in the partition array */
|
||||
long seqnum;
|
||||
#ifdef QUADPOLE
|
||||
matrix quad; /* quad. moment of cell */
|
||||
#endif
|
||||
volatile long done; /* flag to tell when the c.of.m is ready */
|
||||
nodeptr subp[NSUB]; /* descendents of cell */
|
||||
} cell;
|
||||
|
||||
#define Subp(x) (((cellptr) (x))->subp)
|
||||
|
||||
/*
|
||||
* LEAF: structure used to represent leaf nodes of tree.
|
||||
*/
|
||||
|
||||
#define LEAF 03 /* type code for leaves */
|
||||
|
||||
typedef struct _leaf {
|
||||
long type;
|
||||
real mass; /* total mass of leaf */
|
||||
vector pos; /* cm. position of leaf */
|
||||
long cost; /* number of interactions computed */
|
||||
long level;
|
||||
cellptr parent;
|
||||
long child_num; /* Index [0..8] that this node should be put */
|
||||
long processor; /* Used by partition code */
|
||||
struct _leaf *next, *prev; /* Used in the partition array */
|
||||
long seqnum;
|
||||
#ifdef QUADPOLE
|
||||
matrix quad; /* quad. moment of leaf */
|
||||
#endif
|
||||
volatile long done; /* flag to tell when the c.of.m is ready */
|
||||
long num_bodies;
|
||||
bodyptr bodyp[MAX_BODIES_PER_LEAF]; /* bodies of leaf */
|
||||
} leaf;
|
||||
|
||||
#define Bodyp(x) (((leafptr) (x))->bodyp)
|
||||
|
||||
#ifdef QUADPOLE
|
||||
#define Quad(x) (((cellptr) (x))->quad)
|
||||
#endif
|
||||
#define Done(x) (((cellptr) (x))->done)
|
||||
|
||||
/*
|
||||
* Integerized coordinates: used to mantain body-tree.
|
||||
*/
|
||||
|
||||
#define MAXLEVEL ((8L * (long)sizeof(long)) - 2L)
|
||||
#define IMAX (1L << MAXLEVEL) /* highest bit of int coord */
|
||||
|
||||
#endif
|
||||
|
157
splash2/codes/apps/barnes/getparam.C
Normal file
157
splash2/codes/apps/barnes/getparam.C
Normal file
|
@ -0,0 +1,157 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/*
|
||||
* GETPARAM.C:
|
||||
*/
|
||||
EXTERN_ENV
|
||||
#define global extern
|
||||
|
||||
#include "stdinc.h"
|
||||
|
||||
local string *defaults = NULL; /* vector of "name=value" strings */
|
||||
|
||||
/*
|
||||
* INITPARAM: ignore arg vector, remember defaults.
|
||||
*/
|
||||
|
||||
void initparam(string *defv)
|
||||
{
|
||||
defaults = defv;
|
||||
}
|
||||
|
||||
/*
|
||||
* GETPARAM: export version prompts user for value.
|
||||
*/
|
||||
|
||||
string getparam(string name)
|
||||
{
|
||||
long i, leng;
|
||||
string def;
|
||||
char buf[128];
|
||||
|
||||
if (defaults == NULL)
|
||||
error("getparam: called before initparam\n");
|
||||
i = scanbind(defaults, name);
|
||||
if (i < 0)
|
||||
error("getparam: %s unknown\n", name);
|
||||
def = extrvalue(defaults[i]);
|
||||
gets(buf);
|
||||
leng = strlen(buf) + 1;
|
||||
if (leng > 1) {
|
||||
return (strcpy(malloc(leng), buf));
|
||||
}
|
||||
else {
|
||||
return (def);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* GETIPARAM, ..., GETDPARAM: get long, long, bool, or double parameters.
|
||||
*/
|
||||
|
||||
long getiparam(string name)
|
||||
{
|
||||
string val;
|
||||
|
||||
for (val = ""; *val == '\0';) {
|
||||
val = getparam(name);
|
||||
}
|
||||
return (atoi(val));
|
||||
}
|
||||
|
||||
long getlparam(string name)
|
||||
{
|
||||
string val;
|
||||
|
||||
for (val = ""; *val == '\0'; )
|
||||
val = getparam(name);
|
||||
return (atol(val));
|
||||
}
|
||||
|
||||
bool getbparam(string name)
|
||||
{
|
||||
string val;
|
||||
|
||||
for (val = ""; *val == '\0'; )
|
||||
val = getparam(name);
|
||||
if (strchr("tTyY1", *val) != NULL) {
|
||||
return (TRUE);
|
||||
}
|
||||
if (strchr("fFnN0", *val) != NULL) {
|
||||
return (FALSE);
|
||||
}
|
||||
error("getbparam: %s=%s not bool\n", name, val);
|
||||
}
|
||||
|
||||
double getdparam(string name)
|
||||
{
|
||||
string val;
|
||||
|
||||
for (val = ""; *val == '\0'; ) {
|
||||
val = getparam(name);
|
||||
}
|
||||
return (atof(val));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* SCANBIND: scan binding vector for name, return index.
|
||||
*/
|
||||
|
||||
long scanbind(string bvec[], string name)
|
||||
{
|
||||
long i;
|
||||
|
||||
for (i = 0; bvec[i] != NULL; i++)
|
||||
if (matchname(bvec[i], name))
|
||||
return (i);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* MATCHNAME: determine if "name=value" matches "name".
|
||||
*/
|
||||
|
||||
bool matchname(string bind, string name)
|
||||
{
|
||||
char *bp, *np;
|
||||
|
||||
bp = bind;
|
||||
np = name;
|
||||
while (*bp == *np) {
|
||||
bp++;
|
||||
np++;
|
||||
}
|
||||
return (*bp == '=' && *np == '\0');
|
||||
}
|
||||
|
||||
/*
|
||||
* EXTRVALUE: extract value from name=value string.
|
||||
*/
|
||||
|
||||
string extrvalue(string arg)
|
||||
{
|
||||
char *ap;
|
||||
|
||||
ap = (char *) arg;
|
||||
while (*ap != '\0')
|
||||
if (*ap++ == '=')
|
||||
return ((string) ap);
|
||||
return (NULL);
|
||||
}
|
||||
|
14
splash2/codes/apps/barnes/getparam.H
Normal file
14
splash2/codes/apps/barnes/getparam.H
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef _GETPARAM_H_
|
||||
#define _GETPARAM_H_
|
||||
|
||||
void initparam(string *defv);
|
||||
string getparam(string name);
|
||||
long getiparam(string name);
|
||||
long getlparam(string name);
|
||||
bool getbparam(string name);
|
||||
double getdparam(string name);
|
||||
long scanbind(string bvec[], string name);
|
||||
bool matchname(string bind, string name);
|
||||
string extrvalue(string arg);
|
||||
|
||||
#endif
|
150
splash2/codes/apps/barnes/grav.C
Normal file
150
splash2/codes/apps/barnes/grav.C
Normal file
|
@ -0,0 +1,150 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/*
|
||||
* GRAV.C:
|
||||
*/
|
||||
|
||||
EXTERN_ENV
|
||||
#define global extern
|
||||
|
||||
#include "stdinc.h"
|
||||
|
||||
/*
|
||||
* HACKGRAV: evaluate grav field at a given particle.
|
||||
*/
|
||||
|
||||
void hackgrav(bodyptr p, long ProcessId)
|
||||
{
|
||||
Local[ProcessId].pskip = p;
|
||||
SETV(Local[ProcessId].pos0, Pos(p));
|
||||
Local[ProcessId].phi0 = 0.0;
|
||||
CLRV(Local[ProcessId].acc0);
|
||||
Local[ProcessId].myn2bterm = 0;
|
||||
Local[ProcessId].mynbcterm = 0;
|
||||
Local[ProcessId].skipself = FALSE;
|
||||
hackwalk(ProcessId);
|
||||
Phi(p) = Local[ProcessId].phi0;
|
||||
SETV(Acc(p), Local[ProcessId].acc0);
|
||||
#ifdef QUADPOLE
|
||||
Cost(p) = Local[ProcessId].myn2bterm + NDIM * Local[ProcessId].mynbcterm;
|
||||
#else
|
||||
Cost(p) = Local[ProcessId].myn2bterm + Local[ProcessId].mynbcterm;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* GRAVSUB: compute a single body-body or body-cell longeraction.
|
||||
*/
|
||||
|
||||
void gravsub(register nodeptr p, long ProcessId)
|
||||
{
|
||||
real drabs, phii, mor3;
|
||||
vector ai;
|
||||
|
||||
if (p != Local[ProcessId].pmem) {
|
||||
SUBV(Local[ProcessId].dr, Pos(p), Local[ProcessId].pos0);
|
||||
DOTVP(Local[ProcessId].drsq, Local[ProcessId].dr, Local[ProcessId].dr);
|
||||
}
|
||||
|
||||
Local[ProcessId].drsq += epssq;
|
||||
drabs = sqrt((double) Local[ProcessId].drsq);
|
||||
phii = Mass(p) / drabs;
|
||||
Local[ProcessId].phi0 -= phii;
|
||||
mor3 = phii / Local[ProcessId].drsq;
|
||||
MULVS(ai, Local[ProcessId].dr, mor3);
|
||||
ADDV(Local[ProcessId].acc0, Local[ProcessId].acc0, ai);
|
||||
if(Type(p) != BODY) { /* a body-cell/leaf interaction? */
|
||||
Local[ProcessId].mynbcterm++;
|
||||
#ifdef QUADPOLE
|
||||
dr5inv = 1.0/(Local[ProcessId].drsq * Local[ProcessId].drsq * drabs);
|
||||
MULMV(quaddr, Quad(p), Local[ProcessId].dr);
|
||||
DOTVP(drquaddr, Local[ProcessId].dr, quaddr);
|
||||
phiquad = -0.5 * dr5inv * drquaddr;
|
||||
Local[ProcessId].phi0 += phiquad;
|
||||
phiquad = 5.0 * phiquad / Local[ProcessId].drsq;
|
||||
MULVS(ai, Local[ProcessId].dr, phiquad);
|
||||
SUBV(Local[ProcessId].acc0, Local[ProcessId].acc0, ai);
|
||||
MULVS(quaddr, quaddr, dr5inv);
|
||||
SUBV(Local[ProcessId].acc0, Local[ProcessId].acc0, quaddr);
|
||||
#endif
|
||||
}
|
||||
else { /* a body-body interaction */
|
||||
Local[ProcessId].myn2bterm++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* HACKWALK: walk the tree opening cells too close to a given point.
|
||||
*/
|
||||
|
||||
void hackwalk(long ProcessId)
|
||||
{
|
||||
walksub(Global->G_root, Global->rsize * Global->rsize, ProcessId);
|
||||
}
|
||||
|
||||
/*
|
||||
* WALKSUB: recursive routine to do hackwalk operation.
|
||||
*/
|
||||
|
||||
void walksub(nodeptr n, real dsq, long ProcessId)
|
||||
{
|
||||
nodeptr* nn;
|
||||
leafptr l;
|
||||
bodyptr p;
|
||||
long i;
|
||||
|
||||
if (subdivp(n, dsq, ProcessId)) {
|
||||
if (Type(n) == CELL) {
|
||||
for (nn = Subp(n); nn < Subp(n) + NSUB; nn++) {
|
||||
if (*nn != NULL) {
|
||||
walksub(*nn, dsq / 4.0, ProcessId);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
l = (leafptr) n;
|
||||
for (i = 0; i < l->num_bodies; i++) {
|
||||
p = Bodyp(l)[i];
|
||||
if (p != Local[ProcessId].pskip) {
|
||||
gravsub(p, ProcessId);
|
||||
}
|
||||
else {
|
||||
Local[ProcessId].skipself = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
gravsub(n, ProcessId);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* SUBDIVP: decide if a node should be opened.
|
||||
* Side effects: sets pmem,dr, and drsq.
|
||||
*/
|
||||
|
||||
bool subdivp(register nodeptr p, real dsq, long ProcessId)
|
||||
{
|
||||
SUBV(Local[ProcessId].dr, Pos(p), Local[ProcessId].pos0);
|
||||
DOTVP(Local[ProcessId].drsq, Local[ProcessId].dr, Local[ProcessId].dr);
|
||||
Local[ProcessId].pmem = p;
|
||||
return (tolsq * Local[ProcessId].drsq < dsq);
|
||||
}
|
||||
|
10
splash2/codes/apps/barnes/grav.H
Normal file
10
splash2/codes/apps/barnes/grav.H
Normal file
|
@ -0,0 +1,10 @@
|
|||
#ifndef _GRAV_H_
|
||||
#define _GRAV_H_
|
||||
|
||||
void hackgrav(bodyptr p, long ProcessId);
|
||||
void gravsub(register nodeptr p, long ProcessId);
|
||||
void hackwalk(long ProcessId);
|
||||
void walksub(nodeptr n, real dsq, long ProcessId);
|
||||
bool subdivp(register nodeptr p, real dsq, long ProcessId);
|
||||
|
||||
#endif
|
12
splash2/codes/apps/barnes/input
Normal file
12
splash2/codes/apps/barnes/input
Normal file
|
@ -0,0 +1,12 @@
|
|||
|
||||
16384
|
||||
123
|
||||
|
||||
0.025
|
||||
0.05
|
||||
1.0
|
||||
2.0
|
||||
5.0
|
||||
0.075
|
||||
0.25
|
||||
1
|
BIN
splash2/codes/apps/barnes/libpthread.a
Normal file
BIN
splash2/codes/apps/barnes/libpthread.a
Normal file
Binary file not shown.
517
splash2/codes/apps/barnes/load.C
Normal file
517
splash2/codes/apps/barnes/load.C
Normal file
|
@ -0,0 +1,517 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
EXTERN_ENV
|
||||
#define global extern
|
||||
|
||||
#include "stdinc.h"
|
||||
|
||||
/*
|
||||
* MAKETREE: initialize tree structure for hack force calculation.
|
||||
*/
|
||||
|
||||
void maketree(long ProcessId)
|
||||
{
|
||||
bodyptr p, *pp;
|
||||
|
||||
Local[ProcessId].myncell = 0;
|
||||
Local[ProcessId].mynleaf = 0;
|
||||
if (ProcessId == 0) {
|
||||
Local[ProcessId].mycelltab[Local[ProcessId].myncell++] = Global->G_root;
|
||||
}
|
||||
Local[ProcessId].Current_Root = (nodeptr) Global->G_root;
|
||||
for (pp = Local[ProcessId].mybodytab;
|
||||
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody; pp++) {
|
||||
p = *pp;
|
||||
if (Mass(p) != 0.0) {
|
||||
Local[ProcessId].Current_Root
|
||||
= (nodeptr) loadtree(p, (cellptr) Local[ProcessId].Current_Root,
|
||||
ProcessId);
|
||||
}
|
||||
else {
|
||||
LOCK(Global->io_lock);
|
||||
fprintf(stderr, "Process %ld found body %ld to have zero mass\n",
|
||||
ProcessId, (long) p);
|
||||
UNLOCK(Global->io_lock);
|
||||
}
|
||||
}
|
||||
BARRIER(Global->Barrier,NPROC);
|
||||
hackcofm(ProcessId );
|
||||
BARRIER(Global->Barrier,NPROC);
|
||||
}
|
||||
|
||||
cellptr InitCell(cellptr parent, long ProcessId)
|
||||
{
|
||||
cellptr c;
|
||||
|
||||
c = makecell(ProcessId);
|
||||
c->processor = ProcessId;
|
||||
c->next = NULL;
|
||||
c->prev = NULL;
|
||||
if (parent == NULL)
|
||||
Level(c) = IMAX >> 1;
|
||||
else
|
||||
Level(c) = Level(parent) >> 1;
|
||||
Parent(c) = (nodeptr) parent;
|
||||
ChildNum(c) = 0;
|
||||
return (c);
|
||||
}
|
||||
|
||||
leafptr InitLeaf(cellptr parent, long ProcessId)
|
||||
{
|
||||
leafptr l;
|
||||
|
||||
l = makeleaf(ProcessId);
|
||||
l->processor = ProcessId;
|
||||
l->next = NULL;
|
||||
l->prev = NULL;
|
||||
if (parent==NULL)
|
||||
Level(l) = IMAX >> 1;
|
||||
else
|
||||
Level(l) = Level(parent) >> 1;
|
||||
Parent(l) = (nodeptr) parent;
|
||||
ChildNum(l) = 0;
|
||||
return (l);
|
||||
}
|
||||
|
||||
void printtree(nodeptr n)
|
||||
{
|
||||
long k;
|
||||
cellptr c;
|
||||
leafptr l;
|
||||
bodyptr p;
|
||||
nodeptr tmp;
|
||||
long nseq;
|
||||
|
||||
switch (Type(n)) {
|
||||
case CELL:
|
||||
c = (cellptr) n;
|
||||
nseq = c->seqnum;
|
||||
printf("Cell : Cost = %ld, ", Cost(c));
|
||||
PRTV("Pos", Pos(n));
|
||||
printf("\n");
|
||||
for (k = 0; k < NSUB; k++) {
|
||||
printf("Child #%ld: ", k);
|
||||
if (Subp(c)[k] == NULL) {
|
||||
printf("NONE");
|
||||
}
|
||||
else {
|
||||
if (Type(Subp(c)[k]) == CELL) {
|
||||
nseq = ((cellptr) Subp(c)[k])->seqnum;
|
||||
printf("C: Cost = %ld, ", Cost(Subp(c)[k]));
|
||||
}
|
||||
else {
|
||||
nseq = ((leafptr) Subp(c)[k])->seqnum;
|
||||
printf("L: # Bodies = %2ld, Cost = %ld, ",
|
||||
((leafptr) Subp(c)[k])->num_bodies, Cost(Subp(c)[k]));
|
||||
}
|
||||
tmp = Subp(c)[k];
|
||||
PRTV("Pos", Pos(tmp));
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
for (k=0;k<NSUB;k++) {
|
||||
if (Subp(c)[k] != NULL) {
|
||||
printtree(Subp(c)[k]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case LEAF:
|
||||
l = (leafptr) n;
|
||||
nseq = l->seqnum;
|
||||
printf("Leaf : # Bodies = %2ld, Cost = %ld, ", l->num_bodies, Cost(l));
|
||||
PRTV("Pos", Pos(n));
|
||||
printf("\n");
|
||||
for (k = 0; k < l->num_bodies; k++) {
|
||||
p = Bodyp(l)[k];
|
||||
printf("Body #%2ld: Num = %2ld, Level = %ld, ",
|
||||
p - bodytab, k, Level(p));
|
||||
PRTV("Pos",Pos(p));
|
||||
printf("\n");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Bad type\n");
|
||||
exit(-1);
|
||||
break;
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
/*
|
||||
* LOADTREE: descend tree and insert particle.
|
||||
*/
|
||||
|
||||
nodeptr loadtree(bodyptr p, cellptr root, long ProcessId)
|
||||
{
|
||||
long l, xp[NDIM], xor[NDIM], flag;
|
||||
long i, j, root_level;
|
||||
bool valid_root;
|
||||
long kidIndex;
|
||||
volatile nodeptr *volatile qptr, mynode;
|
||||
leafptr le;
|
||||
|
||||
intcoord(xp, Pos(p));
|
||||
valid_root = TRUE;
|
||||
for (i = 0; i < NDIM; i++) {
|
||||
xor[i] = xp[i] ^ Local[ProcessId].Root_Coords[i];
|
||||
}
|
||||
for (i = IMAX >> 1; i > Level(root); i >>= 1) {
|
||||
for (j = 0; j < NDIM; j++) {
|
||||
if (xor[j] & i) {
|
||||
valid_root = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!valid_root) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!valid_root) {
|
||||
if (root != Global->G_root) {
|
||||
root_level = Level(root);
|
||||
for (j = i; j > root_level; j >>= 1) {
|
||||
root = (cellptr) Parent(root);
|
||||
}
|
||||
valid_root = TRUE;
|
||||
for (i = IMAX >> 1; i > Level(root); i >>= 1) {
|
||||
for (j = 0; j < NDIM; j++) {
|
||||
if (xor[j] & i) {
|
||||
valid_root = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!valid_root) {
|
||||
printf("P%ld body %ld\n", ProcessId, p - bodytab);
|
||||
root = Global->G_root;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
root = Global->G_root;
|
||||
mynode = (nodeptr) root;
|
||||
kidIndex = subindex(xp, Level(mynode));
|
||||
qptr = &Subp(mynode)[kidIndex];
|
||||
|
||||
l = Level(mynode) >> 1;
|
||||
flag = TRUE;
|
||||
while (flag) { /* loop descending tree */
|
||||
if (l == 0) {
|
||||
error("not enough levels in tree\n");
|
||||
}
|
||||
if (*qptr == NULL) {
|
||||
/* lock the parent cell */
|
||||
ALOCK(CellLock->CL, ((cellptr) mynode)->seqnum % MAXLOCK);
|
||||
if (*qptr == NULL) {
|
||||
le = InitLeaf((cellptr) mynode, ProcessId);
|
||||
Parent(p) = (nodeptr) le;
|
||||
Level(p) = l;
|
||||
ChildNum(p) = le->num_bodies;
|
||||
ChildNum(le) = kidIndex;
|
||||
Bodyp(le)[le->num_bodies++] = p;
|
||||
*qptr = (nodeptr) le;
|
||||
flag = FALSE;
|
||||
}
|
||||
AULOCK(CellLock->CL, ((cellptr) mynode)->seqnum % MAXLOCK);
|
||||
/* unlock the parent cell */
|
||||
}
|
||||
if (flag && *qptr && (Type(*qptr) == LEAF)) {
|
||||
/* reached a "leaf"? */
|
||||
ALOCK(CellLock->CL, ((cellptr) mynode)->seqnum % MAXLOCK);
|
||||
/* lock the parent cell */
|
||||
if (Type(*qptr) == LEAF) { /* still a "leaf"? */
|
||||
le = (leafptr) *qptr;
|
||||
if (le->num_bodies == MAX_BODIES_PER_LEAF) {
|
||||
*qptr = (nodeptr) SubdivideLeaf(le, (cellptr) mynode, l,
|
||||
ProcessId);
|
||||
}
|
||||
else {
|
||||
Parent(p) = (nodeptr) le;
|
||||
Level(p) = l;
|
||||
ChildNum(p) = le->num_bodies;
|
||||
Bodyp(le)[le->num_bodies++] = p;
|
||||
flag = FALSE;
|
||||
}
|
||||
}
|
||||
AULOCK(CellLock->CL, ((cellptr) mynode)->seqnum % MAXLOCK);
|
||||
/* unlock the node */
|
||||
}
|
||||
if (flag) {
|
||||
mynode = *qptr;
|
||||
kidIndex = subindex(xp, l);
|
||||
qptr = &Subp(*qptr)[kidIndex]; /* move down one level */
|
||||
l = l >> 1; /* and test next bit */
|
||||
}
|
||||
}
|
||||
SETV(Local[ProcessId].Root_Coords, xp);
|
||||
return Parent((leafptr) *qptr);
|
||||
}
|
||||
|
||||
|
||||
/* * INTCOORD: compute integerized coordinates. * Returns: TRUE
|
||||
unless rp was out of bounds. */
|
||||
|
||||
/* integerized coordinate vector [0,IMAX) */
|
||||
/* real coordinate vector (system coords) */
|
||||
bool intcoord(long xp[NDIM], vector rp)
|
||||
{
|
||||
long k;
|
||||
bool inb;
|
||||
double xsc;
|
||||
|
||||
inb = TRUE;
|
||||
for (k = 0; k < NDIM; k++) {
|
||||
xsc = (rp[k] - Global->rmin[k]) / Global->rsize;
|
||||
if (0.0 <= xsc && xsc < 1.0) {
|
||||
xp[k] = floor(IMAX * xsc);
|
||||
}
|
||||
else {
|
||||
inb = FALSE;
|
||||
}
|
||||
}
|
||||
return (inb);
|
||||
}
|
||||
|
||||
/*
|
||||
* SUBINDEX: determine which subcell to select.
|
||||
*/
|
||||
|
||||
/* integerized coordinates of particle */
|
||||
/* current level of tree */
|
||||
long subindex(long x[NDIM], long l)
|
||||
{
|
||||
long i, k;
|
||||
long yes;
|
||||
|
||||
i = 0;
|
||||
yes = FALSE;
|
||||
if (x[0] & l) {
|
||||
i += NSUB >> 1;
|
||||
yes = TRUE;
|
||||
}
|
||||
for (k = 1; k < NDIM; k++) {
|
||||
if (((x[k] & l) && !yes) || (!(x[k] & l) && yes)) {
|
||||
i += NSUB >> (k + 1);
|
||||
yes = TRUE;
|
||||
}
|
||||
else yes = FALSE;
|
||||
}
|
||||
|
||||
return (i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* HACKCOFM: descend tree finding center-of-mass coordinates.
|
||||
*/
|
||||
|
||||
void hackcofm(long ProcessId)
|
||||
{
|
||||
long i;
|
||||
nodeptr r;
|
||||
leafptr l;
|
||||
leafptr* ll;
|
||||
bodyptr p;
|
||||
cellptr q;
|
||||
cellptr *cc;
|
||||
vector tmpv;
|
||||
|
||||
/* get a cell using get*sub. Cells are got in reverse of the order in */
|
||||
/* the cell array; i.e. reverse of the order in which they were created */
|
||||
/* this way, we look at child cells before parents */
|
||||
|
||||
for (ll = Local[ProcessId].myleaftab + Local[ProcessId].mynleaf - 1;
|
||||
ll >= Local[ProcessId].myleaftab; ll--) {
|
||||
l = *ll;
|
||||
Mass(l) = 0.0;
|
||||
Cost(l) = 0;
|
||||
CLRV(Pos(l));
|
||||
for (i = 0; i < l->num_bodies; i++) {
|
||||
p = Bodyp(l)[i];
|
||||
Mass(l) += Mass(p);
|
||||
Cost(l) += Cost(p);
|
||||
MULVS(tmpv, Pos(p), Mass(p));
|
||||
ADDV(Pos(l), Pos(l), tmpv);
|
||||
}
|
||||
DIVVS(Pos(l), Pos(l), Mass(l));
|
||||
#ifdef QUADPOLE
|
||||
CLRM(Quad(l));
|
||||
for (i = 0; i < l->num_bodies; i++) {
|
||||
p = Bodyp(l)[i];
|
||||
SUBV(dr, Pos(p), Pos(l));
|
||||
OUTVP(drdr, dr, dr);
|
||||
DOTVP(drsq, dr, dr);
|
||||
SETMI(Idrsq);
|
||||
MULMS(Idrsq, Idrsq, drsq);
|
||||
MULMS(tmpm, drdr, 3.0);
|
||||
SUBM(tmpm, tmpm, Idrsq);
|
||||
MULMS(tmpm, tmpm, Mass(p));
|
||||
ADDM(Quad(l), Quad(l), tmpm);
|
||||
}
|
||||
#endif
|
||||
Done(l)=TRUE;
|
||||
}
|
||||
for (cc = Local[ProcessId].mycelltab+Local[ProcessId].myncell-1;
|
||||
cc >= Local[ProcessId].mycelltab; cc--) {
|
||||
q = *cc;
|
||||
Mass(q) = 0.0;
|
||||
Cost(q) = 0;
|
||||
CLRV(Pos(q));
|
||||
for (i = 0; i < NSUB; i++) {
|
||||
r = Subp(q)[i];
|
||||
if (r != NULL) {
|
||||
while(!Done(r)) {
|
||||
/* wait */
|
||||
}
|
||||
Mass(q) += Mass(r);
|
||||
Cost(q) += Cost(r);
|
||||
MULVS(tmpv, Pos(r), Mass(r));
|
||||
ADDV(Pos(q), Pos(q), tmpv);
|
||||
Done(r) = FALSE;
|
||||
}
|
||||
}
|
||||
DIVVS(Pos(q), Pos(q), Mass(q));
|
||||
#ifdef QUADPOLE
|
||||
CLRM(Quad(q));
|
||||
for (i = 0; i < NSUB; i++) {
|
||||
r = Subp(q)[i];
|
||||
if (r != NULL) {
|
||||
SUBV(dr, Pos(r), Pos(q));
|
||||
OUTVP(drdr, dr, dr);
|
||||
DOTVP(drsq, dr, dr);
|
||||
SETMI(Idrsq);
|
||||
MULMS(Idrsq, Idrsq, drsq);
|
||||
MULMS(tmpm, drdr, 3.0);
|
||||
SUBM(tmpm, tmpm, Idrsq);
|
||||
MULMS(tmpm, tmpm, Mass(r));
|
||||
ADDM(tmpm, tmpm, Quad(r));
|
||||
ADDM(Quad(q), Quad(q), tmpm);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Done(q)=TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
cellptr SubdivideLeaf(leafptr le, cellptr parent, long l, long ProcessId)
|
||||
{
|
||||
cellptr c;
|
||||
long i, index;
|
||||
long xp[NDIM];
|
||||
bodyptr bodies[MAX_BODIES_PER_LEAF];
|
||||
long num_bodies;
|
||||
bodyptr p;
|
||||
|
||||
/* first copy leaf's bodies to temp array, so we can reuse the leaf */
|
||||
num_bodies = le->num_bodies;
|
||||
for (i = 0; i < num_bodies; i++) {
|
||||
bodies[i] = Bodyp(le)[i];
|
||||
Bodyp(le)[i] = NULL;
|
||||
}
|
||||
le->num_bodies = 0;
|
||||
/* create the parent cell for this subtree */
|
||||
c = InitCell(parent, ProcessId);
|
||||
ChildNum(c) = ChildNum(le);
|
||||
/* do first particle separately, so we can reuse le */
|
||||
p = bodies[0];
|
||||
intcoord(xp, Pos(p));
|
||||
index = subindex(xp, l);
|
||||
Subp(c)[index] = (nodeptr) le;
|
||||
ChildNum(le) = index;
|
||||
Parent(le) = (nodeptr) c;
|
||||
Level(le) = l >> 1;
|
||||
/* set stuff for body */
|
||||
Parent(p) = (nodeptr) le;
|
||||
ChildNum(p) = le->num_bodies;
|
||||
Level(p) = l >> 1;
|
||||
/* insert the body */
|
||||
Bodyp(le)[le->num_bodies++] = p;
|
||||
/* now handle the rest */
|
||||
for (i = 1; i < num_bodies; i++) {
|
||||
p = bodies[i];
|
||||
intcoord(xp, Pos(p));
|
||||
index = subindex(xp, l);
|
||||
if (!Subp(c)[index]) {
|
||||
le = InitLeaf(c, ProcessId);
|
||||
ChildNum(le) = index;
|
||||
Subp(c)[index] = (nodeptr) le;
|
||||
}
|
||||
else {
|
||||
le = (leafptr) Subp(c)[index];
|
||||
}
|
||||
Parent(p) = (nodeptr) le;
|
||||
ChildNum(p) = le->num_bodies;
|
||||
Level(p) = l >> 1;
|
||||
Bodyp(le)[le->num_bodies++] = p;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* MAKECELL: allocation routine for cells.
|
||||
*/
|
||||
|
||||
cellptr makecell(long ProcessId)
|
||||
{
|
||||
cellptr c;
|
||||
long i, Mycell;
|
||||
|
||||
if (Local[ProcessId].mynumcell == maxmycell) {
|
||||
error("makecell: Proc %ld needs more than %ld cells; increase fcells\n",
|
||||
ProcessId,maxmycell);
|
||||
}
|
||||
Mycell = Local[ProcessId].mynumcell++;
|
||||
c = Local[ProcessId].ctab + Mycell;
|
||||
c->seqnum = ProcessId*maxmycell+Mycell;
|
||||
Type(c) = CELL;
|
||||
Done(c) = FALSE;
|
||||
Mass(c) = 0.0;
|
||||
for (i = 0; i < NSUB; i++) {
|
||||
Subp(c)[i] = NULL;
|
||||
}
|
||||
Local[ProcessId].mycelltab[Local[ProcessId].myncell++] = c;
|
||||
return (c);
|
||||
}
|
||||
|
||||
/*
|
||||
* MAKELEAF: allocation routine for leaves.
|
||||
*/
|
||||
|
||||
leafptr makeleaf(long ProcessId)
|
||||
{
|
||||
leafptr le;
|
||||
long i, Myleaf;
|
||||
|
||||
if (Local[ProcessId].mynumleaf == maxmyleaf) {
|
||||
error("makeleaf: Proc %ld needs more than %ld leaves; increase fleaves\n",
|
||||
ProcessId,maxmyleaf);
|
||||
}
|
||||
Myleaf = Local[ProcessId].mynumleaf++;
|
||||
le = Local[ProcessId].ltab + Myleaf;
|
||||
le->seqnum = ProcessId * maxmyleaf + Myleaf;
|
||||
Type(le) = LEAF;
|
||||
Done(le) = FALSE;
|
||||
Mass(le) = 0.0;
|
||||
le->num_bodies = 0;
|
||||
for (i = 0; i < MAX_BODIES_PER_LEAF; i++) {
|
||||
Bodyp(le)[i] = NULL;
|
||||
}
|
||||
Local[ProcessId].myleaftab[Local[ProcessId].mynleaf++] = le;
|
||||
return (le);
|
||||
}
|
||||
|
||||
|
17
splash2/codes/apps/barnes/load.H
Normal file
17
splash2/codes/apps/barnes/load.H
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef _LOAD_H_
|
||||
#define _LOAD_H_
|
||||
|
||||
void maketree(long ProcessId);
|
||||
cellptr InitCell(cellptr parent, long ProcessId);
|
||||
leafptr InitLeaf(cellptr parent, long ProcessId);
|
||||
void printtree(nodeptr n);
|
||||
nodeptr loadtree(bodyptr p, cellptr root, long ProcessId);
|
||||
bool intcoord(long xp[NDIM], vector rp);
|
||||
long subindex(long x[NDIM], long l);
|
||||
void hackcofm(long ProcessId);
|
||||
cellptr SubdivideLeaf(leafptr le, cellptr parent, long l, long ProcessId);
|
||||
cellptr makecell(long ProcessId);
|
||||
leafptr makeleaf(long ProcessId);
|
||||
|
||||
|
||||
#endif
|
BIN
splash2/codes/apps/barnes/m5op_x86.o
Normal file
BIN
splash2/codes/apps/barnes/m5op_x86.o
Normal file
Binary file not shown.
119
splash2/codes/apps/barnes/stdinc.H
Normal file
119
splash2/codes/apps/barnes/stdinc.H
Normal file
|
@ -0,0 +1,119 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/*
|
||||
* STDINC.H: standard include file for C programs.
|
||||
*/
|
||||
|
||||
#ifndef _STDINC_H_
|
||||
#define _STDINC_H_
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <sys/times.h>
|
||||
|
||||
#define error(msg, ...) printf(msg, ##__VA_ARGS__);
|
||||
/*
|
||||
* STREAM: a replacement for FILE *.
|
||||
*/
|
||||
|
||||
typedef FILE *stream;
|
||||
|
||||
/*
|
||||
* BOOL, TRUE and FALSE: standard names for logical values.
|
||||
*/
|
||||
|
||||
typedef long bool;
|
||||
|
||||
#ifndef TRUE
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* BYTE: a short name for a handy chunk of bits.
|
||||
*/
|
||||
|
||||
typedef char byte;
|
||||
|
||||
/*
|
||||
* STRING: for null-terminated strings which are not taken apart.
|
||||
*/
|
||||
|
||||
typedef char *string;
|
||||
|
||||
/*
|
||||
* REAL: default type is double;
|
||||
*/
|
||||
|
||||
typedef double real, *realptr;
|
||||
|
||||
/*
|
||||
* PROC, IPROC, RPROC: pointers to procedures, integer functions, and
|
||||
* real-valued functions, respectively.
|
||||
*/
|
||||
|
||||
typedef void (*proced)();
|
||||
typedef long (*iproc)();
|
||||
typedef real (*rproc)();
|
||||
|
||||
/*
|
||||
* LOCAL: declare something to be local to a file.
|
||||
* PERMANENT: declare something to be permanent data within a function.
|
||||
*/
|
||||
|
||||
#define local static
|
||||
#define permanent static
|
||||
|
||||
/*
|
||||
* STREQ: handy string-equality macro.
|
||||
*/
|
||||
|
||||
#define streq(x,y) (strcmp((x), (y)) == 0)
|
||||
|
||||
/*
|
||||
* PI, etc. -- mathematical constants
|
||||
*/
|
||||
|
||||
#define PI 3.14159265358979323846
|
||||
#define TWO_PI 6.28318530717958647693
|
||||
#define FOUR_PI 12.56637061435917295385
|
||||
#define HALF_PI 1.57079632679489661923
|
||||
#define FRTHRD_PI 4.18879020478639098462
|
||||
|
||||
/*
|
||||
* ABS: returns the absolute value of its argument
|
||||
* MAX: returns the argument with the highest value
|
||||
* MIN: returns the argument with the lowest value
|
||||
*/
|
||||
|
||||
#define ABS(x) (((x) < 0) ? -(x) : (x))
|
||||
|
||||
#include "vectmath.h"
|
||||
#include "defs.h"
|
||||
#include "code.h"
|
||||
#include "util.h"
|
||||
#include "load.h"
|
||||
#include "code_io.h"
|
||||
#include "grav.h"
|
||||
#include "getparam.h"
|
||||
|
||||
#endif
|
71
splash2/codes/apps/barnes/util.C
Normal file
71
splash2/codes/apps/barnes/util.C
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
EXTERN_ENV
|
||||
#define global extern
|
||||
|
||||
#include "stdinc.h"
|
||||
|
||||
#define HZ 60.0
|
||||
#define MULT 1103515245
|
||||
#define ADD 12345
|
||||
#define MASK (0x7FFFFFFF)
|
||||
#define TWOTO31 2147483648.0
|
||||
|
||||
local long A = 1;
|
||||
local long B = 0;
|
||||
local long randx = 1;
|
||||
local long lastrand; /* the last random number */
|
||||
|
||||
/*
|
||||
* XRAND: generate floating-point random number.
|
||||
*/
|
||||
|
||||
double xrand(double xl, double xh)
|
||||
{
|
||||
return (xl + (xh - xl) * prand());
|
||||
}
|
||||
|
||||
void pranset(long seed)
|
||||
{
|
||||
A = 1;
|
||||
B = 0;
|
||||
randx = (A*seed+B) & MASK;
|
||||
A = (MULT * A) & MASK;
|
||||
B = (MULT*B + ADD) & MASK;
|
||||
}
|
||||
|
||||
double prand()
|
||||
/*
|
||||
Return a random double in [0, 1.0)
|
||||
*/
|
||||
{
|
||||
lastrand = randx;
|
||||
randx = (A*randx+B) & MASK;
|
||||
return((double)lastrand/TWOTO31);
|
||||
}
|
||||
|
||||
/*
|
||||
* CPUTIME: compute CPU time in min.
|
||||
*/
|
||||
double cputime()
|
||||
{
|
||||
struct tms buffer;
|
||||
|
||||
if (times(&buffer) == (clock_t)-1)
|
||||
error("times() call failed\n");
|
||||
return (buffer.tms_utime / (60.0 * HZ));
|
||||
}
|
9
splash2/codes/apps/barnes/util.H
Normal file
9
splash2/codes/apps/barnes/util.H
Normal file
|
@ -0,0 +1,9 @@
|
|||
#ifndef _UTIL_H_
|
||||
#define _UTIL_H_
|
||||
|
||||
double xrand(double xl, double xh);
|
||||
void pranset(long seed);
|
||||
double prand(void);
|
||||
double cputime(void);
|
||||
|
||||
#endif
|
305
splash2/codes/apps/barnes/vectmath.H
Normal file
305
splash2/codes/apps/barnes/vectmath.H
Normal file
|
@ -0,0 +1,305 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/*
|
||||
* VECTMATH.H: include file for vector/matrix operations.
|
||||
*/
|
||||
|
||||
#ifndef _VECMATH_H_
|
||||
#define _VECMATH_H_
|
||||
|
||||
|
||||
|
||||
#define NDIM 3L
|
||||
|
||||
typedef real vector[NDIM], matrix[NDIM][NDIM];
|
||||
|
||||
/*
|
||||
* Vector operations.
|
||||
*/
|
||||
|
||||
#define CLRV(v) /* CLeaR Vector */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] = 0.0; \
|
||||
}
|
||||
|
||||
#define UNITV(v,j) /* UNIT Vector */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] = (_i == (j) ? 1.0 : 0.0); \
|
||||
}
|
||||
|
||||
#define SETV(v,u) /* SET Vector */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] = (u)[_i]; \
|
||||
}
|
||||
|
||||
|
||||
#define ADDV(v,u,w) /* ADD Vector */ \
|
||||
{ \
|
||||
register real *_vp = (v), *_up = (u), *_wp = (w); \
|
||||
*_vp++ = (*_up++) + (*_wp++); \
|
||||
*_vp++ = (*_up++) + (*_wp++); \
|
||||
*_vp = (*_up ) + (*_wp ); \
|
||||
}
|
||||
|
||||
#define SUBV(v,u,w) /* SUBtract Vector */ \
|
||||
{ \
|
||||
register real *_vp = (v), *_up = (u), *_wp = (w); \
|
||||
*_vp++ = (*_up++) - (*_wp++); \
|
||||
*_vp++ = (*_up++) - (*_wp++); \
|
||||
*_vp = (*_up ) - (*_wp ); \
|
||||
}
|
||||
|
||||
#define MULVS(v,u,s) /* MULtiply Vector by Scalar */ \
|
||||
{ \
|
||||
register real *_vp = (v), *_up = (u); \
|
||||
*_vp++ = (*_up++) * (s); \
|
||||
*_vp++ = (*_up++) * (s); \
|
||||
*_vp = (*_up ) * (s); \
|
||||
}
|
||||
|
||||
|
||||
#define DIVVS(v,u,s) /* DIVide Vector by Scalar */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] = (u)[_i] / (s); \
|
||||
}
|
||||
|
||||
|
||||
#define DOTVP(s,v,u) /* DOT Vector Product */ \
|
||||
{ \
|
||||
register real *_vp = (v), *_up = (u); \
|
||||
(s) = (*_vp++) * (*_up++); \
|
||||
(s) += (*_vp++) * (*_up++); \
|
||||
(s) += (*_vp ) * (*_up ); \
|
||||
}
|
||||
|
||||
|
||||
#define ABSV(s,v) /* ABSolute value of a Vector */ \
|
||||
{ \
|
||||
double _tmp, sqrt(); \
|
||||
register long _i; \
|
||||
_tmp = 0.0; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
_tmp += (v)[_i] * (v)[_i]; \
|
||||
(s) = sqrt(_tmp); \
|
||||
}
|
||||
|
||||
#define DISTV(s,u,v) /* DISTance between Vectors */ \
|
||||
{ \
|
||||
double _tmp, sqrt(); \
|
||||
register long _i; \
|
||||
_tmp = 0.0; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
_tmp += ((u)[_i]-(v)[_i]) * ((u)[_i]-(v)[_i]); \
|
||||
(s) = sqrt(_tmp); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define CROSSVP(v,u,w) /* CROSS Vector Product */ \
|
||||
{ \
|
||||
(v)[0] = (u)[1]*(w)[2] - (u)[2]*(w)[1]; \
|
||||
(v)[1] = (u)[2]*(w)[0] - (u)[0]*(w)[2]; \
|
||||
(v)[2] = (u)[0]*(w)[1] - (u)[1]*(w)[0]; \
|
||||
}
|
||||
|
||||
|
||||
#define INCADDV(v,u) /* INCrementally ADD Vector */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] += (u)[_i]; \
|
||||
}
|
||||
|
||||
#define INCSUBV(v,u) /* INCrementally SUBtract Vector */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] -= (u)[_i]; \
|
||||
}
|
||||
|
||||
#define INCMULVS(v,s) /* INCrementally MULtiply Vector by Scalar */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] *= (s); \
|
||||
}
|
||||
|
||||
#define INCDIVVS(v,s) /* INCrementally DIVide Vector by Scalar */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] /= (s); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Matrix operations.
|
||||
*/
|
||||
|
||||
#define CLRM(p) /* CLeaR Matrix */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = 0.0; \
|
||||
}
|
||||
|
||||
#define SETMI(p) /* SET Matrix to Identity */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (_i == _j ? 1.0 : 0.0); \
|
||||
}
|
||||
|
||||
#define SETM(p,q) /* SET Matrix */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (q)[_i][_j]; \
|
||||
}
|
||||
|
||||
#define TRANM(p,q) /* TRANspose Matrix */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (q)[_j][_i]; \
|
||||
}
|
||||
|
||||
#define ADDM(p,q,r) /* ADD Matrix */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (q)[_i][_j] + (r)[_i][_j]; \
|
||||
}
|
||||
|
||||
#define SUBM(p,q,r) /* SUBtract Matrix */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (q)[_i][_j] - (r)[_i][_j]; \
|
||||
}
|
||||
|
||||
#define MULM(p,q,r) /* Multiply Matrix */ \
|
||||
{ \
|
||||
register long _i, _j, _k; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) { \
|
||||
(p)[_i][_j] = 0.0; \
|
||||
for (_k = 0; _k < NDIM; _k++) \
|
||||
(p)[_i][_j] += (q)[_i][_k] * (r)[_k][_j]; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MULMS(p,q,s) /* MULtiply Matrix by Scalar */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (q)[_i][_j] * (s); \
|
||||
}
|
||||
|
||||
#define DIVMS(p,q,s) /* DIVide Matrix by Scalar */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (q)[_i][_j] / (s); \
|
||||
}
|
||||
|
||||
#define MULMV(v,p,u) /* MULtiply Matrix by Vector */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) { \
|
||||
(v)[_i] = 0.0; \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(v)[_i] += (p)[_i][_j] * (u)[_j]; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define OUTVP(p,v,u) /* OUTer Vector Product */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (v)[_i] * (u)[_j]; \
|
||||
}
|
||||
|
||||
#define TRACEM(s,p) /* TRACE of Matrix */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
(s) = 0.0; \
|
||||
for (_i = 0.0; _i < NDIM; _i++) \
|
||||
(s) += (p)[_i][_i]; \
|
||||
}
|
||||
|
||||
/*
|
||||
* Misc. impure operations.
|
||||
*/
|
||||
|
||||
#define SETVS(v,s) /* SET Vector to Scalar */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] = (s); \
|
||||
}
|
||||
|
||||
#define ADDVS(v,u,s) /* ADD Vector and Scalar */ \
|
||||
{ \
|
||||
register long _i; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
(v)[_i] = (u)[_i] + (s); \
|
||||
}
|
||||
|
||||
#define SETMS(p,s) /* SET Matrix to Scalar */ \
|
||||
{ \
|
||||
register long _i, _j; \
|
||||
for (_i = 0; _i < NDIM; _i++) \
|
||||
for (_j = 0; _j < NDIM; _j++) \
|
||||
(p)[_i][_j] = (s); \
|
||||
}
|
||||
|
||||
#define PRTV(name, vec) /* PRinT Vector */ \
|
||||
{ \
|
||||
fprintf(stdout,"%s = [%9.4f,%9.4f,%9.4f] ",name,vec[0],vec[1],vec[2]); \
|
||||
}
|
||||
#define PRIV(name, vec) /* PRint Integer Vector */ \
|
||||
{ \
|
||||
fprintf(stdout,"%s = [%d,%d,%d] ",name,vec[0],vec[1],vec[2]); \
|
||||
}
|
||||
#define PROV(name, vec) /* PRint Integer Vector */ \
|
||||
{ \
|
||||
fprintf(stdout,"%s = [%o,%o,%o] ",name,vec[0],vec[1],vec[2]); \
|
||||
}
|
||||
#define PRHV(name, vec) /* PRint Integer Vector */ \
|
||||
{ \
|
||||
fprintf(stdout,"%s = [%x,%x,%x] ",name,vec[0],vec[1],vec[2]); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
26
splash2/codes/apps/fmm/Makefile
Normal file
26
splash2/codes/apps/fmm/Makefile
Normal file
|
@ -0,0 +1,26 @@
|
|||
TARGET = FMM
|
||||
OBJS = box.o construct_grid.o cost_zones.o defs.o fmm.o interactions.o memory.o particle.o partition_grid.o
|
||||
|
||||
include ../../Makefile.config
|
||||
|
||||
defs.c: defs.h
|
||||
box.c: box.h
|
||||
construct_grid.c: construct_grid.h
|
||||
cost_zones.c: cost_zones.h
|
||||
interactions.c: interactions.h
|
||||
memory.c: memory.h
|
||||
particle.c: particle.h
|
||||
partition_grid.c: partition_grid.h
|
||||
|
||||
fmm.o: fmm.C defs.h memory.h particle.h box.h partition_grid.h cost_zones.h construct_grid.h interactions.h
|
||||
interactions.o: interactions.C defs.h memory.h particle.h box.h partition_grid.h interactions.h
|
||||
shell.o: shell.C defs.h memory.h particle.h box.h partition_grid.h interactions.h
|
||||
construct_grid.o: construct_grid.C defs.h memory.h particle.h box.h partition_grid.h construct_grid.h
|
||||
cost_zones.o: cost_zones.C defs.h memory.h box.h partition_grid.h cost_zones.h
|
||||
#orb.o: orb.C defs.h memory.h box.h partition_grid.h orb.h
|
||||
partition_grid.o: partition_grid.C defs.h memory.h box.h partition_grid.h
|
||||
box.o: box.C defs.h memory.h particle.h box.h
|
||||
particle.o: particle.C defs.h memory.h particle.h
|
||||
memory.o: memory.C defs.h memory.h
|
||||
defs.o: defs.C defs.h memory.h
|
||||
memory.h: defs.h particle.h box.h
|
39
splash2/codes/apps/fmm/README.fmm
Normal file
39
splash2/codes/apps/fmm/README.fmm
Normal file
|
@ -0,0 +1,39 @@
|
|||
GENERAL INFORMATION:
|
||||
|
||||
The FMM application implements a parallel adaptive Fast Multipole Method
|
||||
to simulate the interaction of a system of bodies (N-body problem). A
|
||||
description of this implementation can be found in:
|
||||
|
||||
Singh, J. P., et. al. A Parallel Adaptive Fast Multipole Method.
|
||||
Proceedings of Supercomputing 93, November 1993.
|
||||
|
||||
RUNNING THE PROGRAM:
|
||||
|
||||
To see how to run the program, please see the comment at the top of the
|
||||
file fmm.C, or run the application with the "-h" command line option.
|
||||
Optional command line parameters allow for individual processor timing
|
||||
statistics to be printed out, as well as the final particle positions.
|
||||
Input parameters can be placed in an input file and redirected through
|
||||
standard input. Of the nine input parameters, the ones which would
|
||||
normally be changed are the number of particles and the number of
|
||||
processors. If other parameters are changed, these changes should be
|
||||
reported in any results that are presented. Sample input files are
|
||||
included in the inputs subdirectory.
|
||||
|
||||
Sample output for a 1 processor run with the input file inputs/input.256
|
||||
and the timing and output flags specified is contained in the file
|
||||
correct.out.
|
||||
|
||||
BASE PROBLEM SIZE:
|
||||
|
||||
The base problem size for an upto-64 processor machine is 16,384
|
||||
particles. For this many particles, you can use the input file provided
|
||||
(and change only the number of processors).
|
||||
|
||||
DATA DISTRIBUTION:
|
||||
|
||||
Our "POSSIBLE ENHANCEMENT" comments in the source code tell where one
|
||||
might want to distribute data and how. Data distribution, however, does
|
||||
not make much difference to performance on the Stanford DASH
|
||||
multiprocessor.
|
||||
|
367
splash2/codes/apps/fmm/box.C
Normal file
367
splash2/codes/apps/fmm/box.C
Normal file
|
@ -0,0 +1,367 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "defs.h"
|
||||
#include "memory.h"
|
||||
#include "particle.h"
|
||||
#include "box.h"
|
||||
|
||||
/* How many boxes can fit on one line */
|
||||
#define BOXES_PER_LINE 4
|
||||
#define TERMS_PER_LINE 2
|
||||
|
||||
box *Grid = NULL;
|
||||
|
||||
void ZeroBox(long my_id, box *b);
|
||||
|
||||
void
|
||||
CreateBoxes (long my_id, long num_boxes)
|
||||
{
|
||||
long i;
|
||||
|
||||
LOCK(G_Memory->mal_lock);
|
||||
Local[my_id].B_Heap = (box *) G_MALLOC(num_boxes * sizeof(box));
|
||||
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute the
|
||||
B_Heap data across physically distributed memories as desired.
|
||||
|
||||
One way to do this is as follows:
|
||||
|
||||
char *starting_address;
|
||||
char *ending_address;
|
||||
|
||||
starting_address = (char *) Local[my_id].B_Heap;
|
||||
ending_address = (((char *) Local[my_id].B_Heap)
|
||||
+ (num_boxes * sizeof(particle *)) - 1);
|
||||
|
||||
Place all addresses x such that (starting_address <= x < ending_address)
|
||||
on node my_id
|
||||
|
||||
*/
|
||||
|
||||
UNLOCK(G_Memory->mal_lock);
|
||||
Local[my_id].Max_B_Heap = num_boxes;
|
||||
Local[my_id].Index_B_Heap = 0;
|
||||
|
||||
for (i = 0; i < num_boxes; i++) {
|
||||
Local[my_id].B_Heap[i].exp_lock_index = i % (MAX_LOCKS - 1);
|
||||
Local[my_id].B_Heap[i].particle_lock_index = i % (MAX_LOCKS - 1);
|
||||
Local[my_id].B_Heap[i].id = i + ((double) my_id / ID_LIMIT);
|
||||
ZeroBox(my_id, &Local[my_id].B_Heap[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FreeBoxes (long my_id)
|
||||
{
|
||||
long i;
|
||||
box *b_array;
|
||||
|
||||
b_array = Local[my_id].B_Heap;
|
||||
for (i = 0; i < Local[my_id].Index_B_Heap; i++)
|
||||
ZeroBox(my_id, &b_array[i]);
|
||||
Local[my_id].Index_B_Heap = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ZeroBox (long my_id, box *b)
|
||||
{
|
||||
long i;
|
||||
|
||||
b->type = CHILDLESS;
|
||||
b->num_particles = 0;
|
||||
for (i = 0; i < MAX_PARTICLES_PER_BOX; i++)
|
||||
b->particles[i] = NULL;
|
||||
b->parent = NULL;
|
||||
for (i = 0; i < NUM_OFFSPRING; i++) {
|
||||
b->children[i] = NULL;
|
||||
b->shadow[i] = NULL;
|
||||
}
|
||||
b->num_children = 0;
|
||||
b->construct_synch = 0;
|
||||
b->interaction_synch = 0;
|
||||
b->cost = 0;
|
||||
b->proc = my_id;
|
||||
b->subtree_cost = 0;
|
||||
b->next = NULL;
|
||||
b->prev = NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InitBox (long my_id, real x_center, real y_center, real length, long level, box *parent)
|
||||
*
|
||||
* Args : the x_center and y_center of the center of the box;
|
||||
* the length of the box;
|
||||
* the level of the box;
|
||||
* the address of b's parent.
|
||||
*
|
||||
* Returns : the address of the newly created box.
|
||||
*
|
||||
* Side Effects : Initializes num_particles to 0, all other pointers to NULL,
|
||||
* and sets the box ID to a unique number. It also creates the space for
|
||||
* the two expansion arrays.
|
||||
*
|
||||
*/
|
||||
box *
|
||||
InitBox (long my_id, real x_center, real y_center, real length, box *parent)
|
||||
{
|
||||
box *b;
|
||||
|
||||
if (Local[my_id].Index_B_Heap == Local[my_id].Max_B_Heap) {
|
||||
LockedPrint("ERROR (P%d) : Ran out of boxes\n", my_id);
|
||||
exit(-1);
|
||||
}
|
||||
b = &Local[my_id].B_Heap[Local[my_id].Index_B_Heap++];
|
||||
b->x_center = x_center;
|
||||
b->y_center = y_center;
|
||||
b->length = length;
|
||||
b->parent = parent;
|
||||
if (parent == NULL)
|
||||
b->level = 0;
|
||||
else
|
||||
b->level = parent->level + 1;
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PrintBox (box *b)
|
||||
*
|
||||
* Args : the address of a box, b.
|
||||
*
|
||||
* Returns : nothing.
|
||||
*
|
||||
* Side Effects : Prints to stdout the information stored for b.
|
||||
*
|
||||
*/
|
||||
void
|
||||
PrintBox (box *b)
|
||||
{
|
||||
LOCK(G_Memory->io_lock);
|
||||
fflush(stdout);
|
||||
if (b != NULL) {
|
||||
printf("Info for B%f :\n", b->id);
|
||||
printf(" X center = %.40g\n", b->x_center);
|
||||
printf(" Y center = %.40g\n", b->y_center);
|
||||
printf(" Length = %.40g\n", b->length);
|
||||
printf(" Level = %ld\n", b->level);
|
||||
printf(" Type = %d\n", b->type);
|
||||
printf(" Child Num = %ld\n", b->child_num);
|
||||
if (b->parent == NULL)
|
||||
printf(" Parent = NONE\n");
|
||||
else
|
||||
printf(" Parent = B%f\n", b->parent->id);
|
||||
printf(" Children's IDs : ");
|
||||
if (b->num_children != 0)
|
||||
PrintBoxArrayIds(b->children, b->num_children);
|
||||
else
|
||||
printf("NONE\n");
|
||||
printf(" Sibling's IDs : ");
|
||||
if (b->num_siblings != 0)
|
||||
PrintBoxArrayIds(b->siblings, b->num_siblings);
|
||||
else
|
||||
printf("NONE\n");
|
||||
printf(" Colleagues' IDs : ");
|
||||
PrintBoxArrayIds(b->colleagues, b->num_colleagues);
|
||||
printf(" U List IDs : ");
|
||||
PrintBoxArrayIds(b->u_list, b->num_u_list);
|
||||
printf(" V List IDs : ");
|
||||
PrintBoxArrayIds(b->v_list, b->num_v_list);
|
||||
printf(" W List IDs : ");
|
||||
PrintBoxArrayIds(b->w_list, b->num_w_list);
|
||||
printf(" # of Particles = %ld\n", b->num_particles);
|
||||
printf(" Particles' IDs : ");
|
||||
PrintParticleArrayIds(b->particles, b->num_particles);
|
||||
printf(" Assigned Process ID : %ld\n", b->proc);
|
||||
printf(" Cost : %ld\n", b->cost);
|
||||
printf("\n");
|
||||
}
|
||||
else
|
||||
printf("Box has not been initialized yet.\n\n");
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PrintBoxArrayIds (box_node *b_array[], long array_length)
|
||||
*
|
||||
* Args : the address of the box array, b_array;
|
||||
* the length of the array, array_length.
|
||||
*
|
||||
* Returns : nothing.
|
||||
*
|
||||
* Side Effects : Prints to stdout just the id numbers for every box in
|
||||
* b_array.
|
||||
*
|
||||
*/
|
||||
void
|
||||
PrintBoxArrayIds (box *b_array[], long array_length)
|
||||
{
|
||||
long i;
|
||||
long tab_count;
|
||||
|
||||
tab_count = 0;
|
||||
for (i = 0; i < array_length; i++) {
|
||||
if (tab_count == 0) {
|
||||
printf("\n");
|
||||
tab_count = BOXES_PER_LINE;
|
||||
}
|
||||
if (b_array[i] != NULL)
|
||||
printf("\tB%f", b_array[i]->id);
|
||||
tab_count -= 1;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PrintExpansionTerms (real expansion[])
|
||||
*
|
||||
* Args : the array of expansion terms, expansion.
|
||||
*
|
||||
* Returns : nothing.
|
||||
*
|
||||
* Side Effects : Prints to stdout the contents of expansion.
|
||||
*
|
||||
*/
|
||||
void
|
||||
PrintExpansionTerms (complex expansion[])
|
||||
{
|
||||
long i;
|
||||
long tab_count = 0;
|
||||
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
if (tab_count == 0) {
|
||||
printf("\n");
|
||||
tab_count = TERMS_PER_LINE;
|
||||
}
|
||||
if (expansion[i].i >= (real) 0.0)
|
||||
printf("\ta%ld = %.3e + %.3ei", i, expansion[i].r, expansion[i].i);
|
||||
else
|
||||
printf("\ta%ld = %.3e - %.3ei", i, expansion[i].r, -expansion[i].i);
|
||||
tab_count -= 1;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ListIterate (long my_id, box *b, box **list, long length, list_function function)
|
||||
{
|
||||
long i;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (list[i] == NULL) {
|
||||
LockedPrint("ERROR (P%d) : NULL list entry\n", my_id);
|
||||
exit(-1);
|
||||
}
|
||||
(*function)(my_id, list[i], b);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AdjacentBoxes (box *b1, box *b2)
|
||||
*
|
||||
* Args : two potentially adjacent boxes, b1 and b2.
|
||||
*
|
||||
* Returns : TRUE, if boxes are adjacent, FALSE if not.
|
||||
*
|
||||
* Side Effects : none.
|
||||
*
|
||||
* Comments : Two boxes are adjacent if their centers are separated in either
|
||||
* the x or y directions by (1/2 the length of b1) + (1/2 length of b2),
|
||||
* and separated in the other direction by a distance less than or equal
|
||||
* to (1/2 the length of b1) + (1/2 the length of b2).
|
||||
*
|
||||
* NOTE : By this definition, parents are NOT adjacent to their children.
|
||||
*/
|
||||
long
|
||||
AdjacentBoxes (box *b1, box *b2)
|
||||
{
|
||||
real exact_separation;
|
||||
real x_separation;
|
||||
real y_separation;
|
||||
long ret_val;
|
||||
|
||||
exact_separation = (b1->length / (real) 2.0) + (b2->length / (real) 2.0);
|
||||
x_separation = (real) fabs((double)(b1->x_center - b2->x_center));
|
||||
y_separation = (real) fabs((double)(b1->y_center - b2->y_center));
|
||||
|
||||
if ((x_separation == exact_separation) &&
|
||||
(y_separation <= exact_separation))
|
||||
ret_val = TRUE;
|
||||
else
|
||||
if ((y_separation == exact_separation) &&
|
||||
(x_separation <= exact_separation))
|
||||
ret_val = TRUE;
|
||||
else
|
||||
ret_val = FALSE;
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* WellSeparatedBoxes (box *b1, box *b2)
|
||||
*
|
||||
* Args : Two potentially well separated boxes, b1 and b2.
|
||||
*
|
||||
* Returns : TRUE, if the two boxes are well separated, and FALSE if not.
|
||||
*
|
||||
* Side Effects : none.
|
||||
*
|
||||
* Comments : Well separated means that the two boxes are separated by the
|
||||
* length of the boxes. If one of the boxes is bigger than the other,
|
||||
* the smaller box is given the length of the larger box. This means
|
||||
* that the centers of the two boxes, regardless of their relative size,
|
||||
* must be separated in the x or y direction (or both) by at least
|
||||
* twice the length of the biggest box.
|
||||
*
|
||||
*/
|
||||
long
|
||||
WellSeparatedBoxes (box *b1, box *b2)
|
||||
{
|
||||
real min_ws_distance;
|
||||
real x_separation;
|
||||
real y_separation;
|
||||
long ret_val;
|
||||
|
||||
if (b1->length > b2->length)
|
||||
min_ws_distance = b1->length * (real) 2.0;
|
||||
else
|
||||
min_ws_distance = b2->length * (real) 2.0;
|
||||
|
||||
x_separation = (real) fabs((double)(b1->x_center - b2->x_center));
|
||||
y_separation = (real) fabs((double)(b1->y_center - b2->y_center));
|
||||
|
||||
if ((x_separation >= min_ws_distance) || (y_separation >= min_ws_distance))
|
||||
ret_val = TRUE;
|
||||
else
|
||||
ret_val = FALSE;
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
|
||||
#undef BOXES_PER_LINE
|
||||
#undef TERMS_PER_LINE
|
||||
|
134
splash2/codes/apps/fmm/box.H
Normal file
134
splash2/codes/apps/fmm/box.H
Normal file
|
@ -0,0 +1,134 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _Box_H
|
||||
#define _Box_H 1
|
||||
|
||||
#include "defs.h"
|
||||
#include "particle.h"
|
||||
|
||||
/* This definition sets the maximum number of particles allowed per box. */
|
||||
#define MAX_PARTICLES_PER_BOX 40
|
||||
|
||||
/* This definition sets the number of subdivisions (offspring) of a box. */
|
||||
#define NUM_OFFSPRING 4
|
||||
#define MAX_SIBLINGS (NUM_OFFSPRING - 1)
|
||||
#define MAX_COLLEAGUES 8
|
||||
#define MAX_U_LIST 20
|
||||
#define MAX_V_LIST 27
|
||||
#define MAX_W_LIST 30
|
||||
#define MAX_EXPANSION_TERMS 40
|
||||
|
||||
typedef struct _Box box;
|
||||
typedef struct _Box_Node box_node;
|
||||
|
||||
typedef void (*list_function)(long my_id, box *list_box, box *b);
|
||||
|
||||
typedef enum { CHILDLESS, PARENT } box_type;
|
||||
|
||||
#define ID_LIMIT 1000000
|
||||
|
||||
/* Every box has :
|
||||
* 1. A unique ID number (made up of a unique ID number per processor plus
|
||||
* the ID of the processor that created the box)
|
||||
* 2.- 3. An x and y position for its center
|
||||
* 4. The length of the box (measured as the length of one of its sides)
|
||||
* 5. The level of ancestry of the box (how many parents do you have to
|
||||
* visit before the first box is found?)
|
||||
* 6. The number of particles in the box
|
||||
* 7. A list of those particles
|
||||
* 8. A pointer to its parent
|
||||
* 9. The number of children
|
||||
* 10. A list of its children
|
||||
* 11. The number of siblings
|
||||
* 12. A list of its siblings
|
||||
* 13. A linked list of its colleagues
|
||||
* 14. A linked list representing list 1 in RR #496
|
||||
* 15. A linked list representing list 2 in RR #496
|
||||
* 16. A linked list representing list 3 in RR #496
|
||||
* 17. An array of its multipole expansion terms.
|
||||
* 18. An array of its local expansion terms.
|
||||
* 19. The id of the processor that is working on the box.
|
||||
* 20. The amount of computational work associated with the box.
|
||||
*/
|
||||
|
||||
struct _Box
|
||||
{
|
||||
double id;
|
||||
real x_center;
|
||||
real y_center;
|
||||
real length;
|
||||
long level;
|
||||
box_type type;
|
||||
particle *particles[MAX_PARTICLES_PER_BOX + 1];
|
||||
long num_particles;
|
||||
box *parent;
|
||||
long child_num;
|
||||
box *shadow[NUM_OFFSPRING];
|
||||
box *children[NUM_OFFSPRING];
|
||||
long num_children;
|
||||
box *siblings[MAX_SIBLINGS];
|
||||
long num_siblings;
|
||||
box *colleagues[MAX_COLLEAGUES];
|
||||
long num_colleagues;
|
||||
box *u_list[MAX_U_LIST];
|
||||
long num_u_list;
|
||||
box *v_list[MAX_V_LIST];
|
||||
long num_v_list;
|
||||
box *w_list[MAX_W_LIST];
|
||||
long num_w_list;
|
||||
complex mp_expansion[MAX_EXPANSION_TERMS];
|
||||
complex local_expansion[MAX_EXPANSION_TERMS];
|
||||
complex x_expansion[MAX_EXPANSION_TERMS];
|
||||
long exp_lock_index;
|
||||
long particle_lock_index;
|
||||
volatile long construct_synch;
|
||||
volatile long interaction_synch;
|
||||
long proc;
|
||||
long cost;
|
||||
long u_cost;
|
||||
long v_cost;
|
||||
long w_cost;
|
||||
long p_cost;
|
||||
long subtree_cost;
|
||||
box *next;
|
||||
box *prev;
|
||||
box *link1;
|
||||
box *link2;
|
||||
};
|
||||
|
||||
|
||||
/* This structure is used for a linked list of boxes */
|
||||
struct _Box_Node
|
||||
{
|
||||
box *data;
|
||||
struct _Box_Node *next;
|
||||
};
|
||||
|
||||
extern box *Grid;
|
||||
|
||||
extern void CreateBoxes(long my_id, long num_boxes);
|
||||
extern void FreeBoxes(long my_id);
|
||||
extern box *InitBox(long my_id, real x_center, real y_center, real length, box *parent);
|
||||
extern void PrintBox(box *b);
|
||||
extern void PrintBoxArrayIds(box *b_array[], long array_length);
|
||||
extern void PrintExpansionTerms(complex expansion[]);
|
||||
|
||||
extern void ListIterate(long my_id, box *b, box **list, long length, list_function function);
|
||||
extern long AdjacentBoxes(box *b1, box *b2);
|
||||
extern long WellSeparatedBoxes(box *b1, box *b2);
|
||||
|
||||
#endif /* _Box_H */
|
1119
splash2/codes/apps/fmm/construct_grid.C
Normal file
1119
splash2/codes/apps/fmm/construct_grid.C
Normal file
File diff suppressed because it is too large
Load diff
25
splash2/codes/apps/fmm/construct_grid.H
Normal file
25
splash2/codes/apps/fmm/construct_grid.H
Normal file
|
@ -0,0 +1,25 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _Construct_Grid_H
|
||||
#define _Construct_Grid_H 1
|
||||
|
||||
extern void ConstructGrid(long my_id, time_info *local_time, long time_all);
|
||||
extern void ConstructLists(long my_id, time_info *local_time, long time_all);
|
||||
extern void DestroyGrid(long my_id, time_info *local_time, long time_all);
|
||||
extern void PrintGrid(long my_id);
|
||||
|
||||
#endif /* _Construct_Grid_H */
|
279
splash2/codes/apps/fmm/correct.out
Normal file
279
splash2/codes/apps/fmm/correct.out
Normal file
|
@ -0,0 +1,279 @@
|
|||
Expected output for input parameters from file inputs/input.256 with
|
||||
output and timing flags specified:
|
||||
|
||||
|
||||
Creating a two cluster, non uniform distribution for 256 particles
|
||||
Starting FMM with 1 processor
|
||||
Finished FMM
|
||||
PROCESS STATISTICS
|
||||
Track Tree List Part Pass Inter Bar Intra Other
|
||||
Proc Time Time Time Time Time Time Time Time Time
|
||||
0 0 0 0 0 0 0 0 0 0
|
||||
|
||||
TIMING INFORMATION
|
||||
Start time : 782441462
|
||||
Initialization finish time : 782441462
|
||||
Overall finish time : 782441462
|
||||
Total time with initialization : 0
|
||||
Total time without initialization : 0
|
||||
|
||||
Total time for steps 3 to 5 : 0
|
||||
|
||||
PARTICLE POSITIONS
|
||||
|
||||
P 0 : Pos = ( -1.43411, -2.09109)
|
||||
P 1 : Pos = ( -1.54648, -0.68562)
|
||||
P 2 : Pos = ( -0.64031, 0.16307)
|
||||
P 3 : Pos = ( 0.05947, -0.66152)
|
||||
P 4 : Pos = ( -0.70371, -2.32670)
|
||||
P 5 : Pos = ( -0.89875, -1.48421)
|
||||
P 6 : Pos = ( -1.54906, -0.62613)
|
||||
P 7 : Pos = ( 0.69844, 1.10018)
|
||||
P 8 : Pos = ( -0.54329, -1.61378)
|
||||
P 9 : Pos = ( -0.08989, -2.35112)
|
||||
P 10 : Pos = ( -1.33669, -0.33496)
|
||||
P 11 : Pos = ( -0.19485, -0.99712)
|
||||
P 12 : Pos = ( -1.08720, -0.97597)
|
||||
P 13 : Pos = ( -0.88950, -1.78681)
|
||||
P 14 : Pos = ( -1.92000, -2.86092)
|
||||
P 15 : Pos = ( 0.01118, -1.11592)
|
||||
P 16 : Pos = ( 0.29956, -2.20402)
|
||||
P 17 : Pos = ( -1.73261, -0.70957)
|
||||
P 18 : Pos = ( 0.72752, 0.09104)
|
||||
P 19 : Pos = ( -1.76686, -1.05004)
|
||||
P 20 : Pos = ( -0.62225, -0.90594)
|
||||
P 21 : Pos = ( -0.94715, -1.58968)
|
||||
P 22 : Pos = ( -0.03266, -1.04470)
|
||||
P 23 : Pos = ( -0.06388, -0.13640)
|
||||
P 24 : Pos = ( -1.66374, -1.53045)
|
||||
P 25 : Pos = ( -1.77486, -1.16405)
|
||||
P 26 : Pos = ( -2.39597, -1.45824)
|
||||
P 27 : Pos = ( -1.61490, -1.31192)
|
||||
P 28 : Pos = ( -0.11696, -0.87015)
|
||||
P 29 : Pos = ( -0.48613, -1.49041)
|
||||
P 30 : Pos = ( -1.46477, -2.54577)
|
||||
P 31 : Pos = ( -1.09388, -1.07751)
|
||||
P 32 : Pos = ( -1.46100, -1.17696)
|
||||
P 33 : Pos = ( -0.18779, -0.55415)
|
||||
P 34 : Pos = ( 0.23057, 0.06852)
|
||||
P 35 : Pos = ( -0.21021, -0.61339)
|
||||
P 36 : Pos = ( -2.96834, -0.29539)
|
||||
P 37 : Pos = ( -1.33559, -0.77310)
|
||||
P 38 : Pos = ( -1.45288, -0.58590)
|
||||
P 39 : Pos = ( -2.82349, -1.92800)
|
||||
P 40 : Pos = ( -0.24231, -2.37768)
|
||||
P 41 : Pos = ( -0.52698, -0.93986)
|
||||
P 42 : Pos = ( -0.51194, -0.67306)
|
||||
P 43 : Pos = ( -1.46181, -0.29485)
|
||||
P 44 : Pos = ( -1.51229, -1.06319)
|
||||
P 45 : Pos = ( -0.79540, -1.29090)
|
||||
P 46 : Pos = ( -0.57868, -0.09248)
|
||||
P 47 : Pos = ( -0.33401, -2.44589)
|
||||
P 48 : Pos = ( -0.01213, -1.26073)
|
||||
P 49 : Pos = ( -1.38279, -0.56367)
|
||||
P 50 : Pos = ( -1.90767, -1.59006)
|
||||
P 51 : Pos = ( -1.10363, -0.76771)
|
||||
P 52 : Pos = ( -1.05322, -0.70645)
|
||||
P 53 : Pos = ( -0.43662, -1.06196)
|
||||
P 54 : Pos = ( -0.99445, 0.41950)
|
||||
P 55 : Pos = ( -1.08575, -0.74978)
|
||||
P 56 : Pos = ( -1.54303, -1.20363)
|
||||
P 57 : Pos = ( -1.23607, -1.10169)
|
||||
P 58 : Pos = ( -0.88698, -1.96075)
|
||||
P 59 : Pos = ( -2.91866, -2.03021)
|
||||
P 60 : Pos = ( 1.53310, -1.33704)
|
||||
P 61 : Pos = ( -0.49453, -1.23276)
|
||||
P 62 : Pos = ( -1.73482, 0.07055)
|
||||
P 63 : Pos = ( -1.05005, -0.24271)
|
||||
P 64 : Pos = ( -1.31181, -1.24434)
|
||||
P 65 : Pos = ( -1.79726, 1.62154)
|
||||
P 66 : Pos = ( -1.56322, -1.17794)
|
||||
P 67 : Pos = ( -1.49587, -1.82000)
|
||||
P 68 : Pos = ( -0.25687, -1.80248)
|
||||
P 69 : Pos = ( -0.87176, -0.54912)
|
||||
P 70 : Pos = ( -2.08087, -2.36369)
|
||||
P 71 : Pos = ( -2.77277, -0.13150)
|
||||
P 72 : Pos = ( -0.88101, -1.35167)
|
||||
P 73 : Pos = ( -0.71981, -0.94017)
|
||||
P 74 : Pos = ( 0.11335, -0.56285)
|
||||
P 75 : Pos = ( -0.93693, -1.23228)
|
||||
P 76 : Pos = ( -1.55307, -1.34658)
|
||||
P 77 : Pos = ( -0.37223, -1.32314)
|
||||
P 78 : Pos = ( -0.74784, -0.12910)
|
||||
P 79 : Pos = ( -0.17029, -2.23523)
|
||||
P 80 : Pos = ( -2.19951, 1.85571)
|
||||
P 81 : Pos = ( -1.83973, -1.29899)
|
||||
P 82 : Pos = ( 0.46179, -1.83450)
|
||||
P 83 : Pos = ( -0.56821, -1.48287)
|
||||
P 84 : Pos = ( -1.52386, -1.91689)
|
||||
P 85 : Pos = ( -0.55720, -1.03627)
|
||||
P 86 : Pos = ( -1.02957, -3.71620)
|
||||
P 87 : Pos = ( -4.08440, 1.61353)
|
||||
P 88 : Pos = ( -3.78035, 0.03563)
|
||||
P 89 : Pos = ( -0.54816, -1.41596)
|
||||
P 90 : Pos = ( -1.94317, -0.41962)
|
||||
P 91 : Pos = ( -1.08228, -0.90690)
|
||||
P 92 : Pos = ( -0.89155, -0.63790)
|
||||
P 93 : Pos = ( -0.92724, -0.71553)
|
||||
P 94 : Pos = ( -0.96707, -1.42639)
|
||||
P 95 : Pos = ( -1.39903, -1.16314)
|
||||
P 96 : Pos = ( -0.57333, -0.74190)
|
||||
P 97 : Pos = ( -1.38755, -0.99592)
|
||||
P 98 : Pos = ( 1.85628, 1.54037)
|
||||
P 99 : Pos = ( -0.97737, -0.57102)
|
||||
P 100 : Pos = ( -0.50769, -1.60342)
|
||||
P 101 : Pos = ( -0.84604, -1.55463)
|
||||
P 102 : Pos = ( 0.21192, -0.95452)
|
||||
P 103 : Pos = ( -0.51392, -0.74877)
|
||||
P 104 : Pos = ( -0.81335, -1.56088)
|
||||
P 105 : Pos = ( -1.49047, -1.33111)
|
||||
P 106 : Pos = ( -1.01388, -1.32191)
|
||||
P 107 : Pos = ( -3.12680, -0.02822)
|
||||
P 108 : Pos = ( -1.46754, -0.10543)
|
||||
P 109 : Pos = ( -0.97791, -2.03745)
|
||||
P 110 : Pos = ( 0.92297, -1.48565)
|
||||
P 111 : Pos = ( -1.35069, -1.72285)
|
||||
P 112 : Pos = ( 0.49270, -0.67037)
|
||||
P 113 : Pos = ( -1.63986, -1.04857)
|
||||
P 114 : Pos = ( -0.35524, -1.12787)
|
||||
P 115 : Pos = ( -1.72972, 0.63613)
|
||||
P 116 : Pos = ( -1.84838, -0.34173)
|
||||
P 117 : Pos = ( -1.57914, -0.95206)
|
||||
P 118 : Pos = ( -0.55701, -0.85381)
|
||||
P 119 : Pos = ( -1.73082, 0.12909)
|
||||
P 120 : Pos = ( -0.81612, -1.19132)
|
||||
P 121 : Pos = ( -0.88562, 0.04683)
|
||||
P 122 : Pos = ( -1.17066, -0.85713)
|
||||
P 123 : Pos = ( -0.58563, -0.56109)
|
||||
P 124 : Pos = ( -2.95537, -3.89308)
|
||||
P 125 : Pos = ( -0.34982, -1.29778)
|
||||
P 126 : Pos = ( -0.46937, -1.15248)
|
||||
P 127 : Pos = ( -0.17294, -0.58438)
|
||||
P 128 : Pos = ( 0.56589, -0.09110)
|
||||
P 129 : Pos = ( 0.45352, 1.31437)
|
||||
P 130 : Pos = ( 1.35968, 2.16306)
|
||||
P 131 : Pos = ( 2.05947, 1.33847)
|
||||
P 132 : Pos = ( 1.29629, -0.32670)
|
||||
P 133 : Pos = ( 1.10125, 0.51578)
|
||||
P 134 : Pos = ( 0.45094, 1.37386)
|
||||
P 135 : Pos = ( 2.69843, 3.10017)
|
||||
P 136 : Pos = ( 1.45671, 0.38621)
|
||||
P 137 : Pos = ( 1.91010, -0.35113)
|
||||
P 138 : Pos = ( 0.66331, 1.66503)
|
||||
P 139 : Pos = ( 1.80515, 1.00288)
|
||||
P 140 : Pos = ( 0.91279, 1.02402)
|
||||
P 141 : Pos = ( 1.11050, 0.21320)
|
||||
P 142 : Pos = ( 0.07999, -0.86092)
|
||||
P 143 : Pos = ( 2.01118, 0.88407)
|
||||
P 144 : Pos = ( 2.29956, -0.20402)
|
||||
P 145 : Pos = ( 0.26739, 1.29042)
|
||||
P 146 : Pos = ( 2.72752, 2.09102)
|
||||
P 147 : Pos = ( 0.23314, 0.94995)
|
||||
P 148 : Pos = ( 1.37775, 1.09405)
|
||||
P 149 : Pos = ( 1.05285, 0.41031)
|
||||
P 150 : Pos = ( 1.96734, 0.95529)
|
||||
P 151 : Pos = ( 1.93612, 1.86359)
|
||||
P 152 : Pos = ( 0.33626, 0.46954)
|
||||
P 153 : Pos = ( 0.22514, 0.83594)
|
||||
P 154 : Pos = ( -0.39598, 0.54174)
|
||||
P 155 : Pos = ( 0.38510, 0.68808)
|
||||
P 156 : Pos = ( 1.88304, 1.12984)
|
||||
P 157 : Pos = ( 1.51387, 0.50958)
|
||||
P 158 : Pos = ( 0.53522, -0.54578)
|
||||
P 159 : Pos = ( 0.90612, 0.92249)
|
||||
P 160 : Pos = ( 0.53900, 0.82303)
|
||||
P 161 : Pos = ( 1.81221, 1.44585)
|
||||
P 162 : Pos = ( 2.23056, 2.06850)
|
||||
P 163 : Pos = ( 1.78979, 1.38660)
|
||||
P 164 : Pos = ( -0.96834, 1.70461)
|
||||
P 165 : Pos = ( 0.66441, 1.22689)
|
||||
P 166 : Pos = ( 0.54712, 1.41409)
|
||||
P 167 : Pos = ( -0.82349, 0.07197)
|
||||
P 168 : Pos = ( 1.75769, -0.37769)
|
||||
P 169 : Pos = ( 1.47302, 1.06013)
|
||||
P 170 : Pos = ( 1.48806, 1.32693)
|
||||
P 171 : Pos = ( 0.53819, 1.70514)
|
||||
P 172 : Pos = ( 0.48771, 0.93680)
|
||||
P 173 : Pos = ( 1.20460, 0.70910)
|
||||
P 174 : Pos = ( 1.42132, 1.90752)
|
||||
P 175 : Pos = ( 1.66599, -0.44589)
|
||||
P 176 : Pos = ( 1.98787, 0.73926)
|
||||
P 177 : Pos = ( 0.61720, 1.43632)
|
||||
P 178 : Pos = ( 0.09233, 0.40993)
|
||||
P 179 : Pos = ( 0.89637, 1.23228)
|
||||
P 180 : Pos = ( 0.94678, 1.29354)
|
||||
P 181 : Pos = ( 1.56338, 0.93803)
|
||||
P 182 : Pos = ( 1.00555, 2.41949)
|
||||
P 183 : Pos = ( 0.91425, 1.25021)
|
||||
P 184 : Pos = ( 0.45697, 0.79637)
|
||||
P 185 : Pos = ( 0.76393, 0.89831)
|
||||
P 186 : Pos = ( 1.11302, 0.03926)
|
||||
P 187 : Pos = ( -0.91866, -0.03024)
|
||||
P 188 : Pos = ( 3.53310, 0.66295)
|
||||
P 189 : Pos = ( 1.50547, 0.76724)
|
||||
P 190 : Pos = ( 0.26517, 2.07054)
|
||||
P 191 : Pos = ( 0.94994, 1.75728)
|
||||
P 192 : Pos = ( 0.68818, 0.75566)
|
||||
P 193 : Pos = ( 0.20274, 3.62154)
|
||||
P 194 : Pos = ( 0.43678, 0.82206)
|
||||
P 195 : Pos = ( 0.50413, 0.17999)
|
||||
P 196 : Pos = ( 1.74312, 0.19752)
|
||||
P 197 : Pos = ( 1.12824, 1.45088)
|
||||
P 198 : Pos = ( -0.08088, -0.36372)
|
||||
P 199 : Pos = ( -0.77277, 1.86850)
|
||||
P 200 : Pos = ( 1.11899, 0.64833)
|
||||
P 201 : Pos = ( 1.28019, 1.05982)
|
||||
P 202 : Pos = ( 2.11335, 1.43714)
|
||||
P 203 : Pos = ( 1.06307, 0.76771)
|
||||
P 204 : Pos = ( 0.44693, 0.65341)
|
||||
P 205 : Pos = ( 1.62777, 0.67686)
|
||||
P 206 : Pos = ( 1.25216, 1.87090)
|
||||
P 207 : Pos = ( 1.82971, -0.23524)
|
||||
P 208 : Pos = ( -0.19951, 3.85571)
|
||||
P 209 : Pos = ( 0.16027, 0.70100)
|
||||
P 210 : Pos = ( 2.46179, 0.16550)
|
||||
P 211 : Pos = ( 1.43179, 0.51713)
|
||||
P 212 : Pos = ( 0.47614, 0.08310)
|
||||
P 213 : Pos = ( 1.44280, 0.96373)
|
||||
P 214 : Pos = ( 0.97043, -1.71619)
|
||||
P 215 : Pos = ( -2.08440, 3.61353)
|
||||
P 216 : Pos = ( -1.78035, 2.03562)
|
||||
P 217 : Pos = ( 1.45184, 0.58404)
|
||||
P 218 : Pos = ( 0.05683, 1.58038)
|
||||
P 219 : Pos = ( 0.91772, 1.09309)
|
||||
P 220 : Pos = ( 1.10845, 1.36209)
|
||||
P 221 : Pos = ( 1.07276, 1.28446)
|
||||
P 222 : Pos = ( 1.03292, 0.57361)
|
||||
P 223 : Pos = ( 0.60097, 0.83686)
|
||||
P 224 : Pos = ( 1.42667, 1.25810)
|
||||
P 225 : Pos = ( 0.61245, 1.00407)
|
||||
P 226 : Pos = ( 3.85628, 3.54038)
|
||||
P 227 : Pos = ( 1.02262, 1.42898)
|
||||
P 228 : Pos = ( 1.49231, 0.39658)
|
||||
P 229 : Pos = ( 1.15396, 0.44536)
|
||||
P 230 : Pos = ( 2.21192, 1.04546)
|
||||
P 231 : Pos = ( 1.48608, 1.25121)
|
||||
P 232 : Pos = ( 1.18665, 0.43911)
|
||||
P 233 : Pos = ( 0.50953, 0.66889)
|
||||
P 234 : Pos = ( 0.98612, 0.67809)
|
||||
P 235 : Pos = ( -1.12680, 1.97178)
|
||||
P 236 : Pos = ( 0.53246, 1.89456)
|
||||
P 237 : Pos = ( 1.02209, -0.03745)
|
||||
P 238 : Pos = ( 2.92297, 0.51435)
|
||||
P 239 : Pos = ( 0.64931, 0.27714)
|
||||
P 240 : Pos = ( 2.49270, 1.32961)
|
||||
P 241 : Pos = ( 0.36014, 0.95143)
|
||||
P 242 : Pos = ( 1.64475, 0.87212)
|
||||
P 243 : Pos = ( 0.27028, 2.63612)
|
||||
P 244 : Pos = ( 0.15161, 1.65826)
|
||||
P 245 : Pos = ( 0.42086, 1.04793)
|
||||
P 246 : Pos = ( 1.44298, 1.14618)
|
||||
P 247 : Pos = ( 0.26918, 2.12908)
|
||||
P 248 : Pos = ( 1.18388, 0.80868)
|
||||
P 249 : Pos = ( 1.11438, 2.04683)
|
||||
P 250 : Pos = ( 0.82934, 1.14285)
|
||||
P 251 : Pos = ( 1.41437, 1.43891)
|
||||
P 252 : Pos = ( -0.95537, -1.89308)
|
||||
P 253 : Pos = ( 1.65018, 0.70221)
|
||||
P 254 : Pos = ( 1.53063, 0.84752)
|
||||
P 255 : Pos = ( 1.82706, 1.41561)
|
128
splash2/codes/apps/fmm/cost_zones.C
Normal file
128
splash2/codes/apps/fmm/cost_zones.C
Normal file
|
@ -0,0 +1,128 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include "defs.h"
|
||||
#include "memory.h"
|
||||
#include "box.h"
|
||||
#include "partition_grid.h"
|
||||
#include "cost_zones.h"
|
||||
|
||||
#define NUM_DIRECTIONS 4
|
||||
|
||||
typedef enum { RIGHT, LEFT, UP, DOWN } direction;
|
||||
|
||||
static long Child_Sequence[NUM_DIRECTIONS][NUM_OFFSPRING] =
|
||||
{
|
||||
{ 0, 1, 2, 3 },
|
||||
{ 2, 3, 0, 1 },
|
||||
{ 0, 3, 2, 1 },
|
||||
{ 2, 1, 0, 3 },
|
||||
};
|
||||
static long Direction_Sequence[NUM_DIRECTIONS][NUM_OFFSPRING] =
|
||||
{
|
||||
{ UP, RIGHT, RIGHT, DOWN },
|
||||
{ DOWN, LEFT, LEFT, UP },
|
||||
{ RIGHT, UP, UP, LEFT },
|
||||
{ LEFT, DOWN, DOWN, RIGHT },
|
||||
};
|
||||
|
||||
void ComputeSubTreeCosts(long my_id, box *b);
|
||||
void CostZonesHelper(long my_id, box *b, long work, direction dir);
|
||||
|
||||
|
||||
void
|
||||
CostZones (long my_id)
|
||||
{
|
||||
PartitionIterate(my_id, ComputeSubTreeCosts, BOTTOM);
|
||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
||||
Local[my_id].Total_Work = Grid->subtree_cost;
|
||||
Local[my_id].Min_Work = ((Local[my_id].Total_Work / Number_Of_Processors)
|
||||
* my_id);
|
||||
if (my_id == (Number_Of_Processors - 1))
|
||||
Local[my_id].Max_Work = Local[my_id].Total_Work;
|
||||
else
|
||||
Local[my_id].Max_Work = (Local[my_id].Min_Work
|
||||
+ (Local[my_id].Total_Work
|
||||
/ Number_Of_Processors));
|
||||
InitPartition(my_id);
|
||||
CostZonesHelper(my_id, Grid, 0, RIGHT);
|
||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ComputeSubTreeCosts (long my_id, box *b)
|
||||
{
|
||||
box *pb;
|
||||
|
||||
if (b->type == PARENT) {
|
||||
while (b->interaction_synch != b->num_children) {
|
||||
}
|
||||
}
|
||||
b->interaction_synch = 0;
|
||||
ComputeCostOfBox(b);
|
||||
b->subtree_cost += b->cost;
|
||||
pb = b->parent;
|
||||
if (pb != NULL) {
|
||||
ALOCK(G_Memory->lock_array, pb->exp_lock_index);
|
||||
pb->subtree_cost += b->subtree_cost;
|
||||
pb->interaction_synch += 1;
|
||||
AULOCK(G_Memory->lock_array, pb->exp_lock_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
CostZonesHelper (long my_id, box *b, long work, direction dir)
|
||||
{
|
||||
box *cb;
|
||||
long i;
|
||||
long *next_child;
|
||||
long *child_dir;
|
||||
|
||||
if (b->type == CHILDLESS) {
|
||||
if (work >= Local[my_id].Min_Work)
|
||||
InsertBoxInPartition(my_id, b);
|
||||
}
|
||||
else {
|
||||
next_child = Child_Sequence[dir];
|
||||
child_dir = Direction_Sequence[dir];
|
||||
for (i = 0; (i < NUM_OFFSPRING) && (work < Local[my_id].Max_Work);
|
||||
i++) {
|
||||
cb = b->children[next_child[i]];
|
||||
if (cb != NULL) {
|
||||
if ((work + cb->subtree_cost) >= Local[my_id].Min_Work)
|
||||
CostZonesHelper(my_id, cb, work, child_dir[i]);
|
||||
work += cb->subtree_cost;
|
||||
}
|
||||
if (i == 2) {
|
||||
if ((work >= Local[my_id].Min_Work)
|
||||
&& (work < Local[my_id].Max_Work))
|
||||
InsertBoxInPartition(my_id, b);
|
||||
work += b->cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#undef DOWN
|
||||
#undef UP
|
||||
#undef LEFT
|
||||
#undef RIGHT
|
||||
#undef NUM_DIRECTIONS
|
||||
|
22
splash2/codes/apps/fmm/cost_zones.H
Normal file
22
splash2/codes/apps/fmm/cost_zones.H
Normal file
|
@ -0,0 +1,22 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _Cost_Zones_H
|
||||
#define _Cost_Zones_H 1
|
||||
|
||||
extern void CostZones(long my_id);
|
||||
|
||||
#endif /* _Cost_Zones_H */
|
81
splash2/codes/apps/fmm/defs.C
Normal file
81
splash2/codes/apps/fmm/defs.C
Normal file
|
@ -0,0 +1,81 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <math.h>
|
||||
#include "defs.h"
|
||||
#include "memory.h"
|
||||
|
||||
long Number_Of_Processors;
|
||||
double Timestep_Dur;
|
||||
real Softening_Param;
|
||||
long Expansion_Terms;
|
||||
|
||||
|
||||
real
|
||||
RoundReal (real val)
|
||||
{
|
||||
double shifter;
|
||||
double frac;
|
||||
long exp;
|
||||
double shifted_frac;
|
||||
double new_frac;
|
||||
double temp;
|
||||
real ret_val;
|
||||
|
||||
shifter = pow((double) 10, (double) REAL_DIG - 2);
|
||||
frac = frexp((double) val, &exp);
|
||||
shifted_frac = frac * shifter;
|
||||
temp = modf(shifted_frac, &new_frac);
|
||||
new_frac /= shifter;
|
||||
ret_val = (real) ldexp(new_frac, exp);
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PrintComplexNum (complex *c)
|
||||
{
|
||||
if (c->i >= (real) 0.0)
|
||||
printf("%e + %ei", c->r, c->i);
|
||||
else
|
||||
printf("%e - %ei", c->r, -c->i);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PrintVector (vector *v)
|
||||
{
|
||||
printf("(%10.5f, %10.5f)", v->x, v->y);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
LockedPrint (char *format_str, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format_str);
|
||||
LOCK(G_Memory->io_lock);
|
||||
fflush(stdout);
|
||||
vfprintf(stdout, format_str, ap);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
|
175
splash2/codes/apps/fmm/defs.H
Normal file
175
splash2/codes/apps/fmm/defs.H
Normal file
|
@ -0,0 +1,175 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _Defs_H
|
||||
#define _Defs_H 1
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
|
||||
/* Define booleans */
|
||||
#ifdef TRUE
|
||||
#undef TRUE
|
||||
#endif
|
||||
#ifdef FALSE
|
||||
#undef FALSE
|
||||
#endif
|
||||
#define NUM_DIMENSIONS 2
|
||||
#define NUM_DIM_POW_2 4
|
||||
|
||||
#undef DBL_MIN
|
||||
#define DBL_MIN 2.2250738585072014e-308 /* min > 0 val of "double" */
|
||||
|
||||
#define TIME_ALL 1 /* non-0 means time each phase within a time step */
|
||||
#define MY_TIMING (Local[my_id].Timing)
|
||||
#define MY_TIME_STEP (Local[my_id].Time_Step)
|
||||
|
||||
#define MAX_REAL DBL_MAX
|
||||
#define MIN_REAL DBL_MIN
|
||||
#define REAL_DIG __DBL_DIG__
|
||||
|
||||
#define MAX_PROCS 64
|
||||
|
||||
/* Defines the maximum depth of the tree */
|
||||
#define MAX_LEVEL 100
|
||||
#define MAX_TIME_STEPS 10
|
||||
|
||||
#define COMPLEX_ADD(a,b,c) \
|
||||
{ \
|
||||
a.r = b.r + c.r; \
|
||||
a.i = b.i + c.i; \
|
||||
}
|
||||
|
||||
#define COMPLEX_SUB(a,b,c) \
|
||||
{ \
|
||||
a.r = b.r - c.r; \
|
||||
a.i = b.i - c.i; \
|
||||
}
|
||||
|
||||
#define COMPLEX_MUL(a,b,c) \
|
||||
{ \
|
||||
complex _c_temp; \
|
||||
\
|
||||
_c_temp.r = (b.r * c.r) - (b.i * c.i); \
|
||||
_c_temp.i = (b.r * c.i) + (b.i * c.r); \
|
||||
a.r = _c_temp.r; \
|
||||
a.i = _c_temp.i; \
|
||||
}
|
||||
|
||||
#define COMPLEX_DIV(a,b,c) \
|
||||
{ \
|
||||
real _denom; \
|
||||
complex _c_temp; \
|
||||
\
|
||||
_denom = ((real) 1.0) / ((c.r * c.r) + (c.i * c.i)); \
|
||||
_c_temp.r = ((b.r * c.r) + (b.i * c.i)) * _denom; \
|
||||
_c_temp.i = ((b.i * c.r) - (b.r * c.i)) * _denom; \
|
||||
a.r = _c_temp.r; \
|
||||
a.i = _c_temp.i; \
|
||||
}
|
||||
|
||||
#define COMPLEX_ABS(a) \
|
||||
sqrt((double) ((a.r * a.r) + (a.i * a.i)))
|
||||
|
||||
#define VECTOR_ADD(a,b,c) \
|
||||
{ \
|
||||
a.x = b.x + c.x; \
|
||||
a.y = b.y + c.y; \
|
||||
}
|
||||
|
||||
#define VECTOR_SUB(a,b,c) \
|
||||
{ \
|
||||
a.x = b.x - c.x; \
|
||||
a.y = b.y - c.y; \
|
||||
}
|
||||
|
||||
#define VECTOR_MUL(a,b,c) \
|
||||
{ \
|
||||
a.x = b.x * c; \
|
||||
a.y = b.y * c; \
|
||||
}
|
||||
|
||||
#define VECTOR_DIV(a,b,c) \
|
||||
{ \
|
||||
a.x = b.x / c; \
|
||||
a.y = b.y / c; \
|
||||
}
|
||||
|
||||
#define DOT_PRODUCT(a,b) \
|
||||
((a.x * b.x) + (a.y * b.y))
|
||||
|
||||
#define ADD_COST 2
|
||||
#define MUL_COST 5
|
||||
#define DIV_COST 19
|
||||
#define ABS_COST 1
|
||||
|
||||
#define U_LIST_COST(a,b) (1.06 * 79.2 * a * b)
|
||||
#define V_LIST_COST(a) (1.08 * ((35.9 * a * a) + (133.6 * a)))
|
||||
#define W_LIST_COST(a,b) (1.11 * 29.2 * a * b)
|
||||
#define X_LIST_COST(a,b) (1.15 * 56.0 * a * b)
|
||||
#define SELF_COST(a) (7.0 * 61.4 * a * a)
|
||||
|
||||
/* SWOO: Did I put this here? If so, you don't need it */
|
||||
#define CACHE_SIZE 16 /* should be in bytes */
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define PAD_SIZE (PAGE_SIZE / (sizeof(long)))
|
||||
|
||||
typedef enum { FALSE = 0, TRUE = 1 } bool;
|
||||
|
||||
/* These defintions sets the precision of the calculations. To use single
|
||||
* precision, simply change double to float and recompile! */
|
||||
typedef double real;
|
||||
|
||||
typedef struct __Complex complex;
|
||||
struct __Complex {
|
||||
real r;
|
||||
real i;
|
||||
};
|
||||
|
||||
typedef struct _Vector vector;
|
||||
struct _Vector {
|
||||
real x;
|
||||
real y;
|
||||
};
|
||||
|
||||
typedef struct _Time_Info time_info;
|
||||
struct _Time_Info {
|
||||
unsigned long construct_time;
|
||||
unsigned long list_time;
|
||||
unsigned long partition_time;
|
||||
unsigned long inter_time;
|
||||
unsigned long pass_time;
|
||||
unsigned long intra_time;
|
||||
unsigned long barrier_time;
|
||||
unsigned long other_time;
|
||||
unsigned long total_time;
|
||||
};
|
||||
|
||||
extern long Number_Of_Processors;
|
||||
extern double Timestep_Dur;
|
||||
extern real Softening_Param;
|
||||
extern long Expansion_Terms;
|
||||
|
||||
extern real RoundReal(real val);
|
||||
extern void PrintComplexNum(complex *c);
|
||||
extern void PrintVector(vector *v);
|
||||
extern void LockedPrint(char *format, ...);
|
||||
|
||||
#endif /* _Defs_H */
|
615
splash2/codes/apps/fmm/fmm.C
Normal file
615
splash2/codes/apps/fmm/fmm.C
Normal file
|
@ -0,0 +1,615 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/*
|
||||
* FMM.C
|
||||
*
|
||||
* This file contains the entry to Greengard's adaptive algorithm.
|
||||
*
|
||||
|
||||
Usage: FMM <options> < inputfile
|
||||
|
||||
Command line options:
|
||||
|
||||
-o : Print out final particle positions.
|
||||
-s : Print out individual processor timing statistics.
|
||||
-h : Print out command line options
|
||||
|
||||
Input file parameter description:
|
||||
There are a total of nine parameters, with parameters
|
||||
three through seven having no default values.
|
||||
|
||||
1) Cluster Type : Particles are distributed either in one cluster,
|
||||
or two interacting clusters of size (# of particles)/ 2.
|
||||
These two options are selected by the strings "one cluster" or
|
||||
"two cluster". The default is for two clusters.
|
||||
2) Distribution Type : Particles are distributed in a cluster
|
||||
either in a spherical uniform distribution, or according to
|
||||
the Plummer model which typically has a large percentage of the
|
||||
particles close to the center of the sphere and fewer particles
|
||||
farther from the center. There two options are selected by
|
||||
the strings "uniform" or "plummer". The default is for a
|
||||
plummer distribution.
|
||||
3) Number Of Particles : Should be an integer greater than 0.
|
||||
4) Precision : A measure of how accurate the calculation should be.
|
||||
A precision of 1e-3 means that the results will be accurate to
|
||||
within three decimal places regardless of the relative magnitude
|
||||
of the positions. The precision should be a real number greater
|
||||
than 0.
|
||||
5) Number of Processors : Should be an integer greater than 0.
|
||||
6) Number of Time Steps : Should be an integer greater than 0.
|
||||
7) Duration of a Time Step : How long each time step lasts.
|
||||
Should be a double greater than 0.
|
||||
8) Softening Parameter : This value sets the minimum distance in
|
||||
each direction that two particles can be separated by. If two
|
||||
particles are closer than this, the distance used for the
|
||||
calculation is changed to the softening parameter. The particle
|
||||
positions themselves are NOT changed. This number should be a
|
||||
real number greater than 0 and defaults to DBL_MIN or FLT_MIN,
|
||||
depending on what type of data is being used.
|
||||
9) Partitioning Scheme : Sets which type of partitioning scheme
|
||||
is used. There are currently two : "cost zones" and "orb".
|
||||
The default is cost zones.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "defs.h"
|
||||
#include "memory.h"
|
||||
#include "particle.h"
|
||||
#include "box.h"
|
||||
#include "partition_grid.h"
|
||||
#include "cost_zones.h"
|
||||
#include "construct_grid.h"
|
||||
#include "interactions.h"
|
||||
|
||||
#define BASE ((((double) 4) - sqrt((double) 2)) / sqrt((double) 2))
|
||||
#define MAX_LINE_SIZE 100
|
||||
/* OCCUPANCY * maximum particles per box = avg number of particles per box */
|
||||
#define OCCUPANCY ((MAX_PARTICLES_PER_BOX > 5) ? .375 : .750)
|
||||
/* Some processors will be given more than the average number of particles.
|
||||
* PDF (Particle Distribution Factor) is the ratio of the maximum to the avg */
|
||||
#define PDF 4.0
|
||||
/* A nonuniform distribution will require more boxes than a uniform
|
||||
* distribution of the same size. TOLERANCE is used to account for this */
|
||||
#define TOLERANCE 1.5
|
||||
/* Save as PDF, but for boxes */
|
||||
/* define BDF (((Total_Particles/Number_Of_Processors) > 128) ? 2.0 : 3.0)*/
|
||||
#define BDF (((Total_Particles/Number_Of_Processors) > 128) ? 4.0 : 8.0)
|
||||
|
||||
static partition_alg Partition_Flag;
|
||||
static real Precision;
|
||||
static long Time_Steps;
|
||||
static cluster_type Cluster;
|
||||
static model_type Model;
|
||||
long do_stats = 0;
|
||||
long do_output = 0;
|
||||
unsigned long starttime;
|
||||
unsigned long endtime;
|
||||
|
||||
void ParallelExecute(void);
|
||||
void StepSimulation(long my_id, time_info *local_time, long time_all);
|
||||
void PartitionGrid(long my_id, time_info *local_time, long time_all);
|
||||
void GetArguments(void);
|
||||
void PrintTimes(void);
|
||||
void Help(void);
|
||||
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
long c;
|
||||
extern char *optarg;
|
||||
|
||||
CLOCK(starttime);
|
||||
|
||||
while ((c = getopt(argc, argv, "osh")) != -1) {
|
||||
switch(c) {
|
||||
case 'o': do_output = 1; break;
|
||||
case 's': do_stats = 1; break;
|
||||
case 'h': Help(); break;
|
||||
}
|
||||
}
|
||||
|
||||
MAIN_INITENV(,40000000);
|
||||
|
||||
GetArguments();
|
||||
InitGlobalMemory();
|
||||
InitExpTables();
|
||||
CreateDistribution(Cluster, Model);
|
||||
|
||||
/* for (i = 1; i < Number_Of_Processors; i++) {
|
||||
CREATE(ParallelExecute);
|
||||
}
|
||||
ParallelExecute();
|
||||
WAIT_FOR_END(Number_Of_Processors - 1);*/
|
||||
CREATE(ParallelExecute, Number_Of_Processors);
|
||||
WAIT_FOR_END(Number_Of_Processors);
|
||||
|
||||
printf("Finished FMM\n");
|
||||
PrintTimes();
|
||||
if (do_output) {
|
||||
PrintAllParticles();
|
||||
}
|
||||
MAIN_END;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ParallelExecute ()
|
||||
{
|
||||
long my_id;
|
||||
long num_boxes;
|
||||
unsigned long start, finish = 0;
|
||||
time_info *local_time;
|
||||
long time_all = 0;
|
||||
time_info *timing;
|
||||
unsigned long local_init_done = 0;
|
||||
|
||||
BARINCLUDE(G_Memory->synch);
|
||||
local_time = (time_info *) malloc(sizeof(struct _Time_Info) * MAX_TIME_STEPS);
|
||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
||||
LOCK(G_Memory->count_lock);
|
||||
my_id = G_Memory->id;
|
||||
G_Memory->id++;
|
||||
UNLOCK(G_Memory->count_lock);
|
||||
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
||||
processors to avoid migration */
|
||||
|
||||
if (my_id == 0) {
|
||||
time_all = 1;
|
||||
} else if (do_stats) {
|
||||
time_all = 1;
|
||||
}
|
||||
|
||||
if (my_id == 0) {
|
||||
/* have to allocate extra space since it will construct the grid by
|
||||
* itself for the first time step */
|
||||
CreateParticleList(my_id, Total_Particles);
|
||||
InitParticleList(my_id, Total_Particles, 0);
|
||||
}
|
||||
else {
|
||||
CreateParticleList(my_id, ((Total_Particles * PDF)
|
||||
/ Number_Of_Processors));
|
||||
InitParticleList(my_id, 0, 0);
|
||||
}
|
||||
num_boxes = 1.333 * (Total_Particles / (OCCUPANCY * MAX_PARTICLES_PER_BOX));
|
||||
if (my_id == 0)
|
||||
CreateBoxes(my_id, TOLERANCE * num_boxes);
|
||||
else
|
||||
CreateBoxes(my_id, TOLERANCE * num_boxes * BDF / Number_Of_Processors);
|
||||
|
||||
if (my_id == 0) {
|
||||
LockedPrint("Starting FMM with %d processor%s\n", Number_Of_Processors,
|
||||
(Number_Of_Processors == 1) ? "" : "s");
|
||||
}
|
||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
||||
Local[my_id].Time = 0.0;
|
||||
for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) {
|
||||
|
||||
if (MY_TIME_STEP == 2) {
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
||||
statistics that one is measuring about the parallel execution */
|
||||
}
|
||||
|
||||
if (MY_TIME_STEP == 2) {
|
||||
if (do_stats || my_id == 0) {
|
||||
CLOCK(local_init_done);
|
||||
}
|
||||
}
|
||||
|
||||
if (MY_TIME_STEP == 0) {
|
||||
CLOCK(start);
|
||||
}
|
||||
else
|
||||
start = finish;
|
||||
ConstructGrid(my_id,local_time,time_all);
|
||||
ConstructLists(my_id,local_time,time_all);
|
||||
PartitionGrid(my_id,local_time,time_all);
|
||||
StepSimulation(my_id,local_time,time_all);
|
||||
DestroyGrid(my_id,local_time,time_all);
|
||||
CLOCK(finish);
|
||||
Local[my_id].Time += Timestep_Dur;
|
||||
MY_TIMING[MY_TIME_STEP].total_time = finish - start;
|
||||
}
|
||||
if (my_id == 0) {
|
||||
CLOCK(endtime);
|
||||
}
|
||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
||||
for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) {
|
||||
timing = &(MY_TIMING[MY_TIME_STEP]);
|
||||
timing->other_time = local_time[MY_TIME_STEP].other_time;
|
||||
timing->construct_time = local_time[MY_TIME_STEP].construct_time;
|
||||
timing->list_time = local_time[MY_TIME_STEP].list_time;
|
||||
timing->partition_time = local_time[MY_TIME_STEP].partition_time;
|
||||
timing->pass_time = local_time[MY_TIME_STEP].pass_time;
|
||||
timing->inter_time = local_time[MY_TIME_STEP].inter_time;
|
||||
timing->barrier_time = local_time[MY_TIME_STEP].barrier_time;
|
||||
timing->intra_time = local_time[MY_TIME_STEP].intra_time;
|
||||
}
|
||||
Local[my_id].init_done_times = local_init_done;
|
||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PartitionGrid (long my_id, time_info *local_time, long time_all)
|
||||
{
|
||||
unsigned long start = 0, finish;
|
||||
|
||||
if (time_all)
|
||||
CLOCK(start);
|
||||
if (Partition_Flag == COST_ZONES)
|
||||
CostZones(my_id);
|
||||
if (time_all) {
|
||||
CLOCK(finish);
|
||||
local_time[MY_TIME_STEP].partition_time = finish - start;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
StepSimulation (long my_id, time_info *local_time, long time_all)
|
||||
{
|
||||
unsigned long start, finish;
|
||||
unsigned long upward_end, interaction_end, downward_end, barrier_end;
|
||||
|
||||
if (time_all)
|
||||
CLOCK(start);
|
||||
PartitionIterate(my_id, UpwardPass, BOTTOM);
|
||||
if (time_all)
|
||||
CLOCK(upward_end);
|
||||
PartitionIterate(my_id, ComputeInteractions, BOTTOM);
|
||||
if (time_all)
|
||||
CLOCK(interaction_end);
|
||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
||||
if (time_all)
|
||||
CLOCK(barrier_end);
|
||||
PartitionIterate(my_id, DownwardPass, TOP);
|
||||
if (time_all)
|
||||
CLOCK(downward_end);
|
||||
PartitionIterate(my_id, ComputeParticlePositions, CHILDREN);
|
||||
if (time_all)
|
||||
CLOCK(finish);
|
||||
|
||||
if (time_all) {
|
||||
local_time[MY_TIME_STEP].pass_time = upward_end - start;
|
||||
local_time[MY_TIME_STEP].inter_time = interaction_end - upward_end;
|
||||
local_time[MY_TIME_STEP].barrier_time = barrier_end - interaction_end;
|
||||
local_time[MY_TIME_STEP].pass_time += downward_end - barrier_end;
|
||||
local_time[MY_TIME_STEP].intra_time = finish - downward_end;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
GetArguments ()
|
||||
{
|
||||
char *input;
|
||||
|
||||
input = (char *) malloc(MAX_LINE_SIZE * sizeof(char));
|
||||
if (input == NULL) {
|
||||
fprintf(stderr, "ERROR\n");
|
||||
exit(-1);
|
||||
}
|
||||
gets(input);
|
||||
if (strcmp(input, "one cluster") == 0)
|
||||
Cluster = ONE_CLUSTER;
|
||||
else {
|
||||
if ((*input == '\0') || (strcmp(input, "two cluster") == 0))
|
||||
Cluster = TWO_CLUSTER;
|
||||
else {
|
||||
fprintf(stderr, "ERROR: The only cluster types available are ");
|
||||
fprintf(stderr, "\"one cluster\" or \"two cluster\".\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
gets(input);
|
||||
if (strcmp(input, "uniform") == 0)
|
||||
Model = UNIFORM;
|
||||
else {
|
||||
if ((*input == '\0') || (strcmp(input, "plummer") == 0))
|
||||
Model = PLUMMER;
|
||||
else {
|
||||
fprintf(stderr, "ERROR: The only distributions available are ");
|
||||
fprintf(stderr, "\"uniform\" or \"plummer\".\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
Total_Particles = atoi(gets(input));
|
||||
if (Total_Particles <= 0) {
|
||||
fprintf(stderr, "ERROR: The number of particles should be an int ");
|
||||
fprintf(stderr, "greater than 0.\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
Precision = atof(gets(input));
|
||||
if (Precision == 0.0) {
|
||||
fprintf(stderr, "ERROR: The precision has no default value.\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
/* Determine number of multipole expansion terms needed for specified
|
||||
* precision and flag an error if it is too precise */
|
||||
Expansion_Terms = (long) ceil(-(log(Precision) / log(BASE)));
|
||||
if (Expansion_Terms > MAX_EXPANSION_TERMS) {
|
||||
fprintf(stderr, "ERROR: %g (%ld terms) is too great a precision.\n", Precision, Expansion_Terms);
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
Number_Of_Processors = atoi(gets(input));
|
||||
if (Number_Of_Processors == 0) {
|
||||
fprintf(stderr, "ERROR: The Number_Of_Processors has no default.\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
if (Number_Of_Processors < 0) {
|
||||
fprintf(stderr, "ERROR: Number of processors should be an int greater ");
|
||||
fprintf(stderr, "than 0.\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
Time_Steps = atoi(gets(input));
|
||||
if (Time_Steps == 0) {
|
||||
fprintf(stderr, "ERROR: The number of time steps has no default.\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
if (Time_Steps < 0) {
|
||||
fprintf(stderr, "ERROR: The number of time steps should be an int ");
|
||||
fprintf(stderr, "greater than 0.\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
Timestep_Dur = atof(gets(input));
|
||||
if (Timestep_Dur == 0.0) {
|
||||
fprintf(stderr, "ERROR: The duration of a time step has no default ");
|
||||
fprintf(stderr, "value.\n If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
if (Timestep_Dur < 0) {
|
||||
fprintf(stderr, "ERROR: The duration of a time step should be a ");
|
||||
fprintf(stderr, "double greater than 0.\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
Softening_Param = atof(gets(input));
|
||||
if (Softening_Param == 0.0)
|
||||
Softening_Param = MIN_REAL;
|
||||
if (Softening_Param < 0) {
|
||||
fprintf(stderr, "ERROR: The softening parameter should be a double ");
|
||||
fprintf(stderr, "greater than 0.\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
gets(input);
|
||||
if ((*input == '\0') || (strcmp(input, "cost zones") == 0))
|
||||
Partition_Flag = COST_ZONES;
|
||||
else {
|
||||
if (strcmp(input, "orb") == 0)
|
||||
Partition_Flag = ORB;
|
||||
else {
|
||||
fprintf(stderr, "ERROR: The only partitioning schemes available ");
|
||||
fprintf(stderr, "are \"cost zones\" \n\t or \"orb\".\n");
|
||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PrintTimes ()
|
||||
{
|
||||
long i, j;
|
||||
time_info *timing;
|
||||
FILE *fp;
|
||||
double t_total_time = 0;
|
||||
double t_tree_time = 0;
|
||||
double t_list_time = 0;
|
||||
double t_part_time = 0;
|
||||
double t_pass_time = 0;
|
||||
double t_inter_time = 0;
|
||||
double t_bar_time = 0;
|
||||
double t_intra_time = 0;
|
||||
double t_other_time = 0;
|
||||
double total_time;
|
||||
double tree_time;
|
||||
double list_time;
|
||||
double part_time;
|
||||
double pass_time;
|
||||
double inter_time;
|
||||
double bar_time;
|
||||
double intra_time;
|
||||
double other_time;
|
||||
double overall_total = 0;
|
||||
long P;
|
||||
long init_done;
|
||||
|
||||
if ((fp = fopen("times", "w")) == NULL) {
|
||||
fprintf(stderr, "Error opening output file\n");
|
||||
fflush(stderr);
|
||||
exit(-1);
|
||||
}
|
||||
fprintf(fp, "TIMING:\n");
|
||||
fprintf(fp, "%ld\t%ld\t%.2e\t%ld\n", Number_Of_Processors, Total_Particles, Precision, Time_Steps);
|
||||
for (i = 0; i < Time_Steps; i++) {
|
||||
fprintf(fp, "Time Step %ld\n", i);
|
||||
for (j = 0; j < Number_Of_Processors; j++) {
|
||||
timing = &(Local[j].Timing[i]);
|
||||
fprintf(fp, "Processor %ld\n", j);
|
||||
fprintf(fp, "\tTotal Time = %lu\n", timing->total_time);
|
||||
if (do_stats) {
|
||||
fprintf(fp, "\tTree Construction Time = %lu\n",
|
||||
timing->construct_time);
|
||||
fprintf(fp, "\tList Construction Time = %lu\n", timing->list_time);
|
||||
fprintf(fp, "\tPartition Time = %lu\n", timing->partition_time);
|
||||
fprintf(fp, "\tTree Pass Time = %lu\n", timing->pass_time);
|
||||
fprintf(fp, "\tInter Particle Time = %lu\n", timing->inter_time);
|
||||
fprintf(fp, "\tBarrier Time = %lu\n", timing->barrier_time);
|
||||
fprintf(fp, "\tIntra Particle Time = %lu\n", timing->intra_time);
|
||||
fprintf(fp, "\tOther Time = %lu\n", timing->other_time);
|
||||
}
|
||||
fflush(fp);
|
||||
}
|
||||
}
|
||||
fprintf(fp, "END\n");
|
||||
fclose(fp);
|
||||
|
||||
printf(" PROCESS STATISTICS\n");
|
||||
printf(" Track Tree List Part Pass Inter Bar Intra Other\n");
|
||||
printf(" Proc Time Time Time Time Time Time Time Time Time\n");
|
||||
total_time = tree_time = list_time = part_time = pass_time =
|
||||
inter_time = bar_time = intra_time = other_time = 0;
|
||||
for (i = 2; i < Time_Steps; i++) {
|
||||
timing = &(Local[0].Timing[i]);
|
||||
total_time += timing->total_time;
|
||||
tree_time += timing->construct_time;
|
||||
list_time += timing->list_time;
|
||||
part_time += timing->partition_time;
|
||||
pass_time += timing->pass_time;
|
||||
inter_time += timing->inter_time;
|
||||
bar_time += timing->barrier_time;
|
||||
intra_time += timing->intra_time;
|
||||
other_time += timing->other_time;
|
||||
}
|
||||
printf(" %4d %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n",
|
||||
0,total_time,tree_time,list_time,part_time,pass_time,
|
||||
inter_time,bar_time,intra_time,other_time);
|
||||
t_total_time += total_time;
|
||||
t_tree_time += tree_time;
|
||||
t_list_time += list_time;
|
||||
t_part_time += part_time;
|
||||
t_pass_time += pass_time;
|
||||
t_inter_time += inter_time;
|
||||
t_bar_time += bar_time;
|
||||
t_intra_time += intra_time;
|
||||
t_other_time += other_time;
|
||||
if (total_time > overall_total) {
|
||||
overall_total = total_time;
|
||||
}
|
||||
for (j = 1; j < Number_Of_Processors; j++) {
|
||||
total_time = tree_time = list_time = part_time = pass_time =
|
||||
inter_time = bar_time = intra_time = other_time = 0;
|
||||
for (i = 2; i < Time_Steps; i++) {
|
||||
timing = &(Local[j].Timing[i]);
|
||||
total_time += timing->total_time;
|
||||
tree_time += timing->construct_time;
|
||||
list_time += timing->list_time;
|
||||
part_time += timing->partition_time;
|
||||
pass_time += timing->pass_time;
|
||||
inter_time += timing->inter_time;
|
||||
bar_time += timing->barrier_time;
|
||||
intra_time += timing->intra_time;
|
||||
other_time += timing->other_time;
|
||||
}
|
||||
if (do_stats) {
|
||||
printf(" %4ld %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n",
|
||||
j,total_time,tree_time,list_time,part_time,pass_time,
|
||||
inter_time,bar_time,intra_time,other_time);
|
||||
}
|
||||
t_total_time += total_time;
|
||||
t_tree_time += tree_time;
|
||||
t_list_time += list_time;
|
||||
t_part_time += part_time;
|
||||
t_pass_time += pass_time;
|
||||
t_inter_time += inter_time;
|
||||
t_bar_time += bar_time;
|
||||
t_intra_time += intra_time;
|
||||
t_other_time += other_time;
|
||||
if (total_time > overall_total) {
|
||||
overall_total = total_time;
|
||||
}
|
||||
}
|
||||
if (do_stats) {
|
||||
P = Number_Of_Processors;
|
||||
printf(" Avg %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n",
|
||||
t_total_time/P,t_tree_time/P,t_list_time/P,t_part_time/P,
|
||||
t_pass_time/P,t_inter_time/P,t_bar_time/P,t_intra_time/P,
|
||||
t_other_time/P);
|
||||
}
|
||||
printf("\n");
|
||||
if (Time_Steps > 2) {
|
||||
init_done = Local[0].init_done_times;
|
||||
if (do_stats) {
|
||||
for (j = 1; j < Number_Of_Processors; j++) {
|
||||
if (Local[j].init_done_times > init_done) {
|
||||
init_done = Local[j].init_done_times;
|
||||
}
|
||||
}
|
||||
}
|
||||
printf(" TIMING INFORMATION\n");
|
||||
printf("Start time : %16lu\n", starttime);
|
||||
printf("Initialization finish time : %16lu\n", init_done);
|
||||
printf("Overall finish time : %16lu\n", endtime);
|
||||
printf("Total time with initialization : %16lu\n", endtime - starttime);
|
||||
printf("Total time without initialization : %16lu\n", (long) (overall_total));
|
||||
printf("\n");
|
||||
|
||||
printf("Total time for steps %ld to %ld : %12.0f\n", 3L, Time_Steps, overall_total);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Help ()
|
||||
{
|
||||
printf("Usage: FMM <options> < inputfile\n\n");
|
||||
printf("options:\n");
|
||||
printf(" -o : Print out final particle positions.\n");
|
||||
printf(" -s : Print out individual processor timing statistics.\n");
|
||||
printf(" -h : Print out command line options\n");
|
||||
printf("\n");
|
||||
printf("Input parameter descriptions:\n");
|
||||
printf(" There are nine parameters, and parameters three through\n");
|
||||
printf(" have no default values.\n");
|
||||
printf("1) Cluster Type : Distribute particles in one cluster\n");
|
||||
|
||||
printf(" (\"one cluster\") or two interacting clusters (\"two cluster\")\n");
|
||||
printf(" Default is two cluster.\n");
|
||||
printf("2) Distribution Type : Distribute particles in either a\n");
|
||||
printf(" uniform spherical distribution (\"uniform\"), or in a\n");
|
||||
printf(" Plummer model (\"plummer\"). Default is plummer.\n");
|
||||
printf("3) Number Of Particles : Integer greater than 0.\n");
|
||||
printf("4) Precision : Precision of results. Should be a double.\n");
|
||||
printf("5) Number of Processors : Integer greater than 0.\n");
|
||||
printf("6) Number of Time Steps : Integer greater than 0.\n");
|
||||
printf("7) Time Step Duration : Double greater than 0.\n");
|
||||
printf("8) Softening Parameter : Real number greater than 0.\n");
|
||||
printf(" Defaults is DBL_MIN or FLT_MIN.\n");
|
||||
printf("9) Partitioning Scheme : \"cost zones\" or \"orb\".\n");
|
||||
printf(" Default is cost zones.\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#undef MAX_LINE_SIZE
|
||||
#undef BASE
|
9
splash2/codes/apps/fmm/inputs/input.16384
Normal file
9
splash2/codes/apps/fmm/inputs/input.16384
Normal file
|
@ -0,0 +1,9 @@
|
|||
two cluster
|
||||
plummer
|
||||
16384
|
||||
1e-6
|
||||
1
|
||||
5
|
||||
.025
|
||||
0.0
|
||||
cost zones
|
9
splash2/codes/apps/fmm/inputs/input.2048
Normal file
9
splash2/codes/apps/fmm/inputs/input.2048
Normal file
|
@ -0,0 +1,9 @@
|
|||
two cluster
|
||||
plummer
|
||||
2048
|
||||
1e-6
|
||||
1
|
||||
5
|
||||
.025
|
||||
0.0
|
||||
cost zones
|
9
splash2/codes/apps/fmm/inputs/input.256
Normal file
9
splash2/codes/apps/fmm/inputs/input.256
Normal file
|
@ -0,0 +1,9 @@
|
|||
two cluster
|
||||
plummer
|
||||
256
|
||||
1e-6
|
||||
1
|
||||
5
|
||||
.025
|
||||
0.0
|
||||
cost zones
|
664
splash2/codes/apps/fmm/interactions.C
Normal file
664
splash2/codes/apps/fmm/interactions.C
Normal file
|
@ -0,0 +1,664 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "defs.h"
|
||||
#include "memory.h"
|
||||
#include "particle.h"
|
||||
#include "box.h"
|
||||
#include "partition_grid.h"
|
||||
#include "interactions.h"
|
||||
|
||||
static real Inv[MAX_EXPANSION_TERMS + 1];
|
||||
static real OverInc[MAX_EXPANSION_TERMS + 1];
|
||||
static real C[2 * MAX_EXPANSION_TERMS][2 * MAX_EXPANSION_TERMS];
|
||||
static complex One;
|
||||
static complex Zero;
|
||||
|
||||
void InitExp(box *b);
|
||||
void ComputeMPExp(box *b);
|
||||
void ShiftMPExp(box *cb, box *pb);
|
||||
void UListInteraction(long my_id, box *b1, box *b2);
|
||||
void VListInteraction(long my_id, box *source_box, box *dest_box);
|
||||
void WAndXListInteractions(long my_id, box *b1, box *b2);
|
||||
void WListInteraction(box *source_box, box *dest_box);
|
||||
void XListInteraction(box *source_box, box *dest_box);
|
||||
void ComputeSelfInteraction(box *b);
|
||||
void ShiftLocalExp(box *pb, box *cb);
|
||||
void EvaluateLocalExp(box *b);
|
||||
|
||||
|
||||
void
|
||||
InitExpTables ()
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
|
||||
for (i = 1; i < MAX_EXPANSION_TERMS + 1; i++) {
|
||||
Inv[i] = ((real) 1) / (real) i;
|
||||
OverInc[i] = ((real) i) / ((real) i + (real) 1);
|
||||
}
|
||||
C[0][0] = (real) 1.0;
|
||||
for (i = 1; i < (2 * MAX_EXPANSION_TERMS); i++) {
|
||||
C[i][0] = (real) 1.0;
|
||||
C[i][1] = (real) i;
|
||||
C[i - 1][i] = (real) 0.0;
|
||||
for (j = 2; j <= i; j++)
|
||||
C[i][j] = C[i - 1][j] + C[i - 1][j - 1];
|
||||
}
|
||||
|
||||
One.r = (real) 1.0;
|
||||
One.i = (real) 0.0;
|
||||
Zero.r = (real) 0.0;
|
||||
Zero.i = (real) 0.0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PrintExpTables ()
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
|
||||
printf("Table for the functions f(i) = 1 / i and g(i) = i / (i + 1)\n");
|
||||
printf("i\t\tf(i)\t\tg(i)\t\t\n");
|
||||
for (i = 1; i < MAX_EXPANSION_TERMS; i++)
|
||||
printf("%ld\t\t%e\t%f\t\n", i, Inv[i], OverInc[i]);
|
||||
printf("\n\nTable for the function h(i,j) = i choose j\n");
|
||||
printf("i\tj\th(i,j)\n");
|
||||
for (i = 0; i < (2 * MAX_EXPANSION_TERMS); i++) {
|
||||
for (j = 0; j <= i; j++)
|
||||
printf("%ld\t%ld\t%g\n", i, j, C[i][j]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
UpwardPass (long my_id, box *b)
|
||||
{
|
||||
InitExp(b);
|
||||
if (b->type == CHILDLESS) {
|
||||
ComputeMPExp(b);
|
||||
ALOCK(G_Memory->lock_array, b->exp_lock_index);
|
||||
b->interaction_synch = 1;
|
||||
AULOCK(G_Memory->lock_array, b->exp_lock_index);
|
||||
}
|
||||
else {
|
||||
while (b->interaction_synch != b->num_children) {
|
||||
/* wait */;
|
||||
}
|
||||
}
|
||||
if (b->parent != NULL) {
|
||||
ShiftMPExp(b, b->parent);
|
||||
ALOCK(G_Memory->lock_array, b->parent->exp_lock_index);
|
||||
b->parent->interaction_synch += 1;
|
||||
AULOCK(G_Memory->lock_array, b->parent->exp_lock_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ComputeInteractions (long my_id, box *b)
|
||||
{
|
||||
b->cost = 0;
|
||||
if (b->type == CHILDLESS) {
|
||||
ComputeSelfInteraction(b);
|
||||
ListIterate(my_id, b, b->u_list, b->num_u_list, UListInteraction);
|
||||
ListIterate(my_id, b, b->w_list, b->num_w_list, WAndXListInteractions);
|
||||
}
|
||||
ListIterate(my_id, b, b->v_list, b->num_v_list, VListInteraction);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
DownwardPass (long my_id, box *b)
|
||||
{
|
||||
if (b->parent != NULL) {
|
||||
while (b->parent->interaction_synch != 0) {
|
||||
/* wait */;
|
||||
}
|
||||
ShiftLocalExp(b->parent, b);
|
||||
}
|
||||
if (b->type == CHILDLESS) {
|
||||
EvaluateLocalExp(b);
|
||||
b->interaction_synch = 0;
|
||||
}
|
||||
else {
|
||||
ALOCK(G_Memory->lock_array, b->exp_lock_index);
|
||||
b->interaction_synch = 0;
|
||||
AULOCK(G_Memory->lock_array, b->exp_lock_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ComputeParticlePositions (long my_id, box *b)
|
||||
{
|
||||
particle *p;
|
||||
vector force;
|
||||
vector new_acc;
|
||||
vector delta_acc;
|
||||
vector delta_vel;
|
||||
vector avg_vel;
|
||||
vector delta_pos;
|
||||
long i;
|
||||
|
||||
for (i = 0; i < b->num_particles; i++) {
|
||||
p = b->particles[i];
|
||||
force.x = p->field.r * p->charge;
|
||||
force.y = p->field.i * p->charge;
|
||||
VECTOR_DIV(new_acc, force, p->mass);
|
||||
if (Local[my_id].Time_Step != 0) {
|
||||
VECTOR_SUB(delta_acc, new_acc, (p->acc));
|
||||
VECTOR_MUL(delta_vel, delta_acc, ((real) Timestep_Dur) / (real) 2.0);
|
||||
VECTOR_ADD((p->vel), (p->vel), delta_vel);
|
||||
}
|
||||
p->acc.x = new_acc.x;
|
||||
p->acc.y = new_acc.y;
|
||||
VECTOR_MUL(delta_vel, (p->acc), ((real) Timestep_Dur) / (real) 2.0);
|
||||
VECTOR_ADD(avg_vel, (p->vel), delta_vel);
|
||||
VECTOR_MUL(delta_pos, avg_vel, (real) Timestep_Dur);
|
||||
VECTOR_ADD((p->vel), avg_vel, delta_vel);
|
||||
VECTOR_ADD((p->pos), (p->pos), delta_pos);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
InitExp (box *b)
|
||||
{
|
||||
long i;
|
||||
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
b->mp_expansion[i].r = 0.0;
|
||||
b->mp_expansion[i].i = 0.0;
|
||||
b->local_expansion[i].r = 0.0;
|
||||
b->local_expansion[i].i = 0.0;
|
||||
b->x_expansion[i].r = 0.0;
|
||||
b->x_expansion[i].i = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ComputeMPExp (long my_id, box *b)
|
||||
*
|
||||
* Args : a box, b.
|
||||
*
|
||||
* Returns : nothing.
|
||||
*
|
||||
* Side Effects : Computes and sets the multipole expansion array.
|
||||
*
|
||||
* Comments : The first terms (a0) in the expansion is simply the sum of the
|
||||
* charges in the box. This procedure first computes the distances between
|
||||
* the particles in the box and the boxes center. At the same time, a0 is
|
||||
* computed. Then the remaining terms are calculated by theorem 2.1.1 in
|
||||
* Greengard's thesis.
|
||||
*
|
||||
*/
|
||||
void
|
||||
ComputeMPExp (box *b)
|
||||
{
|
||||
particle *p;
|
||||
complex charge;
|
||||
complex box_pos;
|
||||
complex particle_pos;
|
||||
complex z0;
|
||||
complex z0_pow_n;
|
||||
complex temp;
|
||||
complex result_exp[MAX_EXPANSION_TERMS];
|
||||
long i;
|
||||
long j;
|
||||
|
||||
box_pos.r = b->x_center;
|
||||
box_pos.i = b->y_center;
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
result_exp[i].r = (real) 0.0;
|
||||
result_exp[i].i = (real) 0.0;
|
||||
}
|
||||
for (i = 0; i < b->num_particles; i++) {
|
||||
p = b->particles[i];
|
||||
particle_pos.r = p->pos.x;
|
||||
particle_pos.i = p->pos.y;
|
||||
charge.r = p->charge;
|
||||
charge.i = (real) 0.0;
|
||||
COMPLEX_SUB(z0, particle_pos, box_pos);
|
||||
z0_pow_n.r = One.r;
|
||||
z0_pow_n.i = One.i;
|
||||
for (j = 1; j < Expansion_Terms; j++) {
|
||||
COMPLEX_MUL(temp, z0_pow_n, charge);
|
||||
COMPLEX_ADD(result_exp[j], result_exp[j], temp);
|
||||
COMPLEX_MUL(z0_pow_n, z0_pow_n, z0);
|
||||
}
|
||||
}
|
||||
ALOCK(G_Memory->lock_array, b->exp_lock_index);
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
b->mp_expansion[i].r = result_exp[i].r;
|
||||
b->mp_expansion[i].i = result_exp[i].i;
|
||||
}
|
||||
AULOCK(G_Memory->lock_array, b->exp_lock_index);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ShiftMPExp (box *cb, box *pb)
|
||||
{
|
||||
complex z0;
|
||||
complex z0_inv;
|
||||
complex z0_pow_n;
|
||||
complex z0_pow_minus_n;
|
||||
complex temp_exp[MAX_EXPANSION_TERMS];
|
||||
complex result_exp[MAX_EXPANSION_TERMS];
|
||||
complex child_pos;
|
||||
complex parent_pos;
|
||||
complex temp;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
child_pos.r = cb->x_center;
|
||||
child_pos.i = cb->y_center;
|
||||
parent_pos.r = pb->x_center;
|
||||
parent_pos.i = pb->y_center;
|
||||
COMPLEX_SUB(z0, child_pos, parent_pos);
|
||||
COMPLEX_DIV(z0_inv, One, z0);
|
||||
z0_pow_n.r = One.r;
|
||||
z0_pow_n.i = One.i;
|
||||
z0_pow_minus_n.r = One.r;
|
||||
z0_pow_minus_n.i = One.i;
|
||||
result_exp[0].r = cb->mp_expansion[0].r;
|
||||
result_exp[0].i = cb->mp_expansion[0].i;
|
||||
for (i = 1; i < Expansion_Terms; i++) {
|
||||
result_exp[i].r = (real) 0.0;
|
||||
result_exp[i].i = (real) 0.0;
|
||||
COMPLEX_MUL(z0_pow_minus_n, z0_pow_minus_n, z0_inv);
|
||||
COMPLEX_MUL(temp_exp[i], z0_pow_minus_n, cb->mp_expansion[i]);
|
||||
for (j = 1; j <= i; j++) {
|
||||
temp.r = C[i - 1][j - 1];
|
||||
temp.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp, temp, temp_exp[j]);
|
||||
COMPLEX_ADD(result_exp[i], result_exp[i], temp);
|
||||
}
|
||||
temp.r = Inv[i];
|
||||
temp.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp, temp, cb->mp_expansion[0]);
|
||||
COMPLEX_SUB(temp, result_exp[i], temp);
|
||||
COMPLEX_MUL(z0_pow_n, z0_pow_n, z0);
|
||||
COMPLEX_MUL(result_exp[i], temp, z0_pow_n);
|
||||
}
|
||||
ALOCK(G_Memory->lock_array, pb->exp_lock_index);
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
COMPLEX_ADD((pb->mp_expansion[i]), (pb->mp_expansion[i]), result_exp[i]);
|
||||
}
|
||||
AULOCK(G_Memory->lock_array, pb->exp_lock_index);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
UListInteraction (long my_id, box *source_box, box *dest_box)
|
||||
{
|
||||
complex result;
|
||||
complex temp_vector;
|
||||
complex temp_charge;
|
||||
complex temp_result;
|
||||
real denom;
|
||||
real x_sep;
|
||||
real y_sep;
|
||||
real dest_x;
|
||||
real dest_y;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
for (i = 0; i < dest_box->num_particles; i++) {
|
||||
result.r = (real) 0.0;
|
||||
result.i = (real) 0.0;
|
||||
dest_x = dest_box->particles[i]->pos.x;
|
||||
dest_y = dest_box->particles[i]->pos.y;
|
||||
for (j = 0; j < source_box->num_particles; j++) {
|
||||
x_sep = source_box->particles[j]->pos.x - dest_x;
|
||||
y_sep = source_box->particles[j]->pos.y - dest_y;
|
||||
denom = ((real) 1.0) / ((x_sep * x_sep) + (y_sep * y_sep));
|
||||
temp_vector.r = x_sep * denom;
|
||||
temp_vector.i = y_sep * denom;
|
||||
temp_charge.r = source_box->particles[j]->charge;
|
||||
temp_charge.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp_result, temp_vector, temp_charge);
|
||||
COMPLEX_SUB(result, result, temp_result);
|
||||
}
|
||||
result.i = -result.i;
|
||||
COMPLEX_ADD((dest_box->particles[i]->field),
|
||||
(dest_box->particles[i]->field), result);
|
||||
}
|
||||
|
||||
dest_box->cost += U_LIST_COST(source_box->num_particles,
|
||||
dest_box->num_particles);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
VListInteraction (long my_id, box *source_box, box *dest_box)
|
||||
{
|
||||
complex z0;
|
||||
complex z0_inv;
|
||||
complex z0_pow_minus_n[MAX_EXPANSION_TERMS];
|
||||
complex temp_exp[MAX_EXPANSION_TERMS];
|
||||
complex result_exp;
|
||||
complex source_pos;
|
||||
complex dest_pos;
|
||||
complex temp;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
if (source_box->type == CHILDLESS) {
|
||||
while (source_box->interaction_synch != 1) {
|
||||
/* wait */;
|
||||
}
|
||||
}
|
||||
else {
|
||||
while (source_box->interaction_synch != source_box->num_children) {
|
||||
/* wait */;
|
||||
}
|
||||
}
|
||||
|
||||
source_pos.r = source_box->x_center;
|
||||
source_pos.i = source_box->y_center;
|
||||
dest_pos.r = dest_box->x_center;
|
||||
dest_pos.i = dest_box->y_center;
|
||||
COMPLEX_SUB(z0, source_pos, dest_pos);
|
||||
COMPLEX_DIV(z0_inv, One, z0);
|
||||
z0_pow_minus_n[0].r = One.r;
|
||||
z0_pow_minus_n[0].i = One.i;
|
||||
temp_exp[0].r = source_box->mp_expansion[0].r;
|
||||
temp_exp[0].i = source_box->mp_expansion[0].i;
|
||||
for (i = 1; i < Expansion_Terms; i++) {
|
||||
COMPLEX_MUL(z0_pow_minus_n[i], z0_pow_minus_n[i - 1], z0_inv);
|
||||
COMPLEX_MUL(temp_exp[i], z0_pow_minus_n[i], source_box->mp_expansion[i]);
|
||||
}
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
result_exp.r = (real) 0.0;
|
||||
result_exp.i = (real) 0.0;
|
||||
for (j = 1; j < Expansion_Terms; j++) {
|
||||
temp.r = C[i + j - 1][j - 1];
|
||||
temp.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp, temp, temp_exp[j]);
|
||||
if ((j & 0x1) == 0x0) {
|
||||
COMPLEX_ADD(result_exp, result_exp, temp);
|
||||
}
|
||||
else {
|
||||
COMPLEX_SUB(result_exp, result_exp, temp);
|
||||
}
|
||||
}
|
||||
COMPLEX_MUL(result_exp, result_exp, z0_pow_minus_n[i]);
|
||||
if (i == 0) {
|
||||
temp.r = log(COMPLEX_ABS(z0));
|
||||
temp.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp, temp, source_box->mp_expansion[0]);
|
||||
COMPLEX_ADD(result_exp, result_exp, temp);
|
||||
}
|
||||
else {
|
||||
temp.r = Inv[i];
|
||||
temp.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp, temp, z0_pow_minus_n[i]);
|
||||
COMPLEX_MUL(temp, temp, source_box->mp_expansion[0]);
|
||||
COMPLEX_SUB(result_exp, result_exp, temp);
|
||||
}
|
||||
COMPLEX_ADD((dest_box->local_expansion[i]),
|
||||
(dest_box->local_expansion[i]), result_exp);
|
||||
}
|
||||
dest_box->cost += V_LIST_COST(Expansion_Terms);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
WAndXListInteractions (long my_id, box *b1, box *b2)
|
||||
{
|
||||
WListInteraction(b1, b2);
|
||||
XListInteraction(b2, b1);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
WListInteraction (box *source_box, box *dest_box)
|
||||
{
|
||||
complex z0;
|
||||
complex z0_inv;
|
||||
complex result;
|
||||
complex source_pos;
|
||||
complex particle_pos;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
if (source_box->type == CHILDLESS) {
|
||||
while (source_box->interaction_synch != 1) {
|
||||
/* wait */;
|
||||
}
|
||||
}
|
||||
else {
|
||||
while (source_box->interaction_synch != source_box->num_children) {
|
||||
/* wait */;
|
||||
}
|
||||
}
|
||||
|
||||
source_pos.r = source_box->x_center;
|
||||
source_pos.i = source_box->y_center;
|
||||
for (i = 0; i < dest_box->num_particles; i++) {
|
||||
result.r = (real) 0.0;
|
||||
result.i = (real) 0.0;
|
||||
particle_pos.r = dest_box->particles[i]->pos.x;
|
||||
particle_pos.i = dest_box->particles[i]->pos.y;
|
||||
COMPLEX_SUB(z0, particle_pos, source_pos);
|
||||
COMPLEX_DIV(z0_inv, One, z0);
|
||||
for (j = Expansion_Terms - 1; j > 0; j--) {
|
||||
COMPLEX_ADD(result, result, (source_box->mp_expansion[j]));
|
||||
COMPLEX_MUL(result, result, z0_inv);
|
||||
}
|
||||
COMPLEX_ADD((dest_box->particles[i]->field),
|
||||
(dest_box->particles[i]->field), result);
|
||||
}
|
||||
|
||||
dest_box->cost += W_LIST_COST(dest_box->num_particles, Expansion_Terms);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
XListInteraction (box *source_box, box *dest_box)
|
||||
{
|
||||
complex z0;
|
||||
complex z0_inv;
|
||||
complex z0_pow_minus_n;
|
||||
complex result_exp[MAX_EXPANSION_TERMS];
|
||||
complex source_pos;
|
||||
complex dest_pos;
|
||||
complex charge;
|
||||
complex temp;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
dest_pos.r = dest_box->x_center;
|
||||
dest_pos.i = dest_box->y_center;
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
result_exp[i].r = (real) 0.0;
|
||||
result_exp[i].i = (real) 0.0;
|
||||
}
|
||||
for (i = 0; i < source_box->num_particles; i++) {
|
||||
source_pos.r = source_box->particles[i]->pos.x;
|
||||
source_pos.i = source_box->particles[i]->pos.y;
|
||||
charge.r = source_box->particles[i]->charge;
|
||||
charge.i = (real) 0.0;
|
||||
COMPLEX_SUB(z0, source_pos, dest_pos);
|
||||
COMPLEX_DIV(z0_inv, One, z0);
|
||||
z0_pow_minus_n.r = z0_inv.r;
|
||||
z0_pow_minus_n.i = z0_inv.i;
|
||||
for (j = 1; j < Expansion_Terms; j++) {
|
||||
COMPLEX_MUL(z0_pow_minus_n, z0_pow_minus_n, z0_inv);
|
||||
COMPLEX_MUL(temp, charge, z0_pow_minus_n);
|
||||
COMPLEX_ADD(result_exp[j], result_exp[j], temp);
|
||||
}
|
||||
}
|
||||
ALOCK(G_Memory->lock_array, dest_box->exp_lock_index);
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
COMPLEX_SUB((dest_box->x_expansion[i]),
|
||||
(dest_box->x_expansion[i]), result_exp[i]);
|
||||
}
|
||||
AULOCK(G_Memory->lock_array, dest_box->exp_lock_index);
|
||||
source_box->cost += X_LIST_COST(source_box->num_particles, Expansion_Terms);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ComputeSelfInteraction (box *b)
|
||||
{
|
||||
complex results[MAX_PARTICLES_PER_BOX];
|
||||
complex temp_vector;
|
||||
complex temp_charge;
|
||||
complex temp_result;
|
||||
real denom;
|
||||
real x_sep;
|
||||
real y_sep;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
for (i = 0; i < b->num_particles; i++) {
|
||||
results[i].r = (real) 0.0;
|
||||
results[i].i = (real) 0.0;
|
||||
}
|
||||
|
||||
for (i = 0; i < b->num_particles; i++) {
|
||||
for (j = i + 1; j < b->num_particles; j++) {
|
||||
x_sep = b->particles[i]->pos.x - b->particles[j]->pos.x;
|
||||
y_sep = b->particles[i]->pos.y - b->particles[j]->pos.y;
|
||||
|
||||
if ((fabs(x_sep) < Softening_Param)
|
||||
&& (fabs(y_sep) < Softening_Param)) {
|
||||
if (x_sep >= 0.0)
|
||||
x_sep = Softening_Param;
|
||||
else
|
||||
x_sep = -Softening_Param;
|
||||
if (y_sep >= 0.0)
|
||||
y_sep = Softening_Param;
|
||||
else
|
||||
y_sep = -Softening_Param;
|
||||
}
|
||||
denom = ((real) 1.0) / ((x_sep * x_sep) + (y_sep * y_sep));
|
||||
temp_vector.r = x_sep * denom;
|
||||
temp_vector.i = y_sep * denom;
|
||||
|
||||
temp_charge.r = b->particles[j]->charge;
|
||||
temp_charge.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp_result, temp_vector, temp_charge);
|
||||
COMPLEX_ADD(results[i], results[i], temp_result);
|
||||
|
||||
temp_charge.r = b->particles[i]->charge;
|
||||
temp_charge.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp_result, temp_vector, temp_charge);
|
||||
COMPLEX_SUB(results[j], results[j], temp_result);
|
||||
}
|
||||
results[i].i = -results[i].i;
|
||||
COMPLEX_ADD((b->particles[i]->field),
|
||||
(b->particles[i]->field), results[i]);
|
||||
}
|
||||
|
||||
b->cost += SELF_COST(b->num_particles);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ShiftLocalExp (box *pb, box *cb)
|
||||
{
|
||||
complex z0;
|
||||
complex z0_inv;
|
||||
complex z0_pow_n;
|
||||
complex z0_pow_minus_n;
|
||||
complex temp_exp[MAX_EXPANSION_TERMS];
|
||||
complex result_exp[MAX_EXPANSION_TERMS];
|
||||
complex child_pos;
|
||||
complex parent_pos;
|
||||
complex temp;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
child_pos.r = cb->x_center;
|
||||
child_pos.i = cb->y_center;
|
||||
parent_pos.r = pb->x_center;
|
||||
parent_pos.i = pb->y_center;
|
||||
COMPLEX_SUB(z0, child_pos, parent_pos);
|
||||
COMPLEX_DIV(z0_inv, One, z0);
|
||||
z0_pow_n.r = One.r;
|
||||
z0_pow_n.i = One.i;
|
||||
z0_pow_minus_n.r = One.r;
|
||||
z0_pow_minus_n.i = One.i;
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
COMPLEX_ADD(pb->local_expansion[i], pb->local_expansion[i],
|
||||
pb->x_expansion[i]);
|
||||
COMPLEX_MUL(temp_exp[i], z0_pow_n, pb->local_expansion[i]);
|
||||
COMPLEX_MUL(z0_pow_n, z0_pow_n, z0);
|
||||
}
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
result_exp[i].r = (real) 0.0;
|
||||
result_exp[i].i = (real) 0.0;
|
||||
for (j = i; j < Expansion_Terms ; j++) {
|
||||
temp.r = C[j][i];
|
||||
temp.i = (real) 0.0;
|
||||
COMPLEX_MUL(temp, temp, temp_exp[j]);
|
||||
COMPLEX_ADD(result_exp[i], result_exp[i], temp);
|
||||
}
|
||||
COMPLEX_MUL(result_exp[i], temp, z0_pow_minus_n);
|
||||
COMPLEX_MUL(z0_pow_minus_n, z0_pow_minus_n, z0_inv);
|
||||
}
|
||||
ALOCK(G_Memory->lock_array, cb->exp_lock_index);
|
||||
for (i = 0; i < Expansion_Terms; i++) {
|
||||
COMPLEX_ADD((cb->local_expansion[i]), (cb->local_expansion[i]),
|
||||
result_exp[i]);
|
||||
}
|
||||
AULOCK(G_Memory->lock_array, cb->exp_lock_index);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
EvaluateLocalExp (box *b)
|
||||
{
|
||||
complex z0;
|
||||
complex result;
|
||||
complex source_pos;
|
||||
complex particle_pos;
|
||||
complex temp;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
source_pos.r = b->x_center;
|
||||
source_pos.i = b->y_center;
|
||||
for (i = 0; i < b->num_particles; i++) {
|
||||
result.r = (real) 0.0;
|
||||
result.i = (real) 0.0;
|
||||
particle_pos.r = b->particles[i]->pos.x;
|
||||
particle_pos.i = b->particles[i]->pos.y;
|
||||
COMPLEX_SUB(z0, particle_pos, source_pos);
|
||||
for (j = Expansion_Terms - 1; j > 0; j--) {
|
||||
temp.r = (real) j;
|
||||
temp.i = (real) 0.0;
|
||||
COMPLEX_MUL(result, result, z0);
|
||||
COMPLEX_MUL(temp, temp, (b->local_expansion[j]));
|
||||
COMPLEX_ADD(result, result, temp);
|
||||
}
|
||||
COMPLEX_ADD((b->particles[i]->field), (b->particles[i]->field), result);
|
||||
b->particles[i]->field.r = -(b->particles[i]->field.r);
|
||||
b->particles[i]->field.r = RoundReal(b->particles[i]->field.r);
|
||||
b->particles[i]->field.i = RoundReal(b->particles[i]->field.i);
|
||||
}
|
||||
}
|
||||
|
||||
|
30
splash2/codes/apps/fmm/interactions.H
Normal file
30
splash2/codes/apps/fmm/interactions.H
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _Expansions_H
|
||||
#define _Expansions_H 1
|
||||
|
||||
#include "box.h"
|
||||
|
||||
extern void InitExpTables(void);
|
||||
extern void PrintExpTables(void);
|
||||
extern void UpwardPass(long my_id, box *b);
|
||||
extern void ComputeInteractions(long my_id, box *b);
|
||||
extern void DownwardPass(long my_id, box *b);
|
||||
extern void ComputeParticlePositions(long my_id, box *b);
|
||||
|
||||
#endif /* _Interactions_H */
|
||||
|
BIN
splash2/codes/apps/fmm/libpthread.a
Normal file
BIN
splash2/codes/apps/fmm/libpthread.a
Normal file
Binary file not shown.
BIN
splash2/codes/apps/fmm/m5op_x86.o
Normal file
BIN
splash2/codes/apps/fmm/m5op_x86.o
Normal file
Binary file not shown.
60
splash2/codes/apps/fmm/memory.C
Normal file
60
splash2/codes/apps/fmm/memory.C
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include <float.h>
|
||||
#include "defs.h"
|
||||
#include "memory.h"
|
||||
|
||||
MAIN_ENV
|
||||
|
||||
g_mem *G_Memory;
|
||||
local_memory Local[MAX_PROCS];
|
||||
|
||||
/*
|
||||
* InitGlobalMemory ()
|
||||
*
|
||||
* Args : none.
|
||||
*
|
||||
* Returns : nothing.
|
||||
*
|
||||
* Side Effects : Allocates all the global storage for G_Memory.
|
||||
*
|
||||
*/
|
||||
void
|
||||
InitGlobalMemory ()
|
||||
{
|
||||
G_Memory = (g_mem *) G_MALLOC(sizeof(g_mem));
|
||||
G_Memory->i_array = (long *) G_MALLOC(Number_Of_Processors * sizeof(long));
|
||||
G_Memory->d_array = (double *) G_MALLOC(Number_Of_Processors * sizeof(double));
|
||||
if (G_Memory == NULL) {
|
||||
printf("Ran out of global memory in InitGlobalMemory\n");
|
||||
exit(-1);
|
||||
}
|
||||
G_Memory->count = 0;
|
||||
G_Memory->id = 0;
|
||||
LOCKINIT(G_Memory->io_lock);
|
||||
LOCKINIT(G_Memory->mal_lock);
|
||||
LOCKINIT(G_Memory->single_lock);
|
||||
LOCKINIT(G_Memory->count_lock);
|
||||
ALOCKINIT(G_Memory->lock_array, MAX_LOCKS);
|
||||
BARINIT(G_Memory->synch, Number_Of_Processors);
|
||||
G_Memory->max_x = -MAX_REAL;
|
||||
G_Memory->min_x = MAX_REAL;
|
||||
G_Memory->max_y = -MAX_REAL;
|
||||
G_Memory->min_y = MAX_REAL;
|
||||
}
|
||||
|
||||
|
87
splash2/codes/apps/fmm/memory.H
Normal file
87
splash2/codes/apps/fmm/memory.H
Normal file
|
@ -0,0 +1,87 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _Memory_H
|
||||
#define _Memory_H 1
|
||||
|
||||
#include "defs.h"
|
||||
#include "particle.h"
|
||||
#include "box.h"
|
||||
|
||||
#define MAX_LOCKS 2048
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
typedef struct _G_Mem g_mem;
|
||||
|
||||
struct _G_Mem
|
||||
{
|
||||
LOCKDEC(io_lock)
|
||||
LOCKDEC(mal_lock)
|
||||
LOCKDEC(single_lock)
|
||||
LOCKDEC(count_lock)
|
||||
long count;
|
||||
ALOCKDEC(lock_array, MAX_LOCKS)
|
||||
BARDEC(synch)
|
||||
volatile long *i_array;
|
||||
volatile double *d_array;
|
||||
real f_array[MAX_PROCS][NUM_DIM_POW_2];
|
||||
real max_x;
|
||||
real min_x;
|
||||
real max_y;
|
||||
real min_y;
|
||||
long id;
|
||||
};
|
||||
extern g_mem *G_Memory;
|
||||
|
||||
typedef struct _Local_Memory local_memory;
|
||||
struct _Local_Memory {
|
||||
long pad_begin[PAD_SIZE];
|
||||
|
||||
box *B_Heap;
|
||||
long Index_B_Heap;
|
||||
long Max_B_Heap;
|
||||
|
||||
particle **Particles;
|
||||
long Num_Particles;
|
||||
long Max_Particles;
|
||||
|
||||
box *Childless_Partition;
|
||||
box *Parent_Partition[MAX_LEVEL];
|
||||
long Max_Parent_Level;
|
||||
|
||||
box *Local_Grid;
|
||||
real Local_X_Max;
|
||||
real Local_X_Min;
|
||||
real Local_Y_Max;
|
||||
real Local_Y_Min;
|
||||
|
||||
long Total_Work;
|
||||
long Min_Work;
|
||||
long Max_Work;
|
||||
|
||||
long Time_Step;
|
||||
double Time;
|
||||
unsigned long init_done_times;
|
||||
time_info Timing[MAX_TIME_STEPS];
|
||||
|
||||
long pad_end[PAD_SIZE];
|
||||
};
|
||||
extern local_memory Local[MAX_PROCS];
|
||||
|
||||
extern void InitGlobalMemory(void);
|
||||
|
||||
#endif /* _Memory_H */
|
341
splash2/codes/apps/fmm/particle.C
Normal file
341
splash2/codes/apps/fmm/particle.C
Normal file
|
@ -0,0 +1,341 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include "defs.h"
|
||||
#include "memory.h"
|
||||
#include "particle.h"
|
||||
|
||||
#define ONE_EV ((real) 1.6e-19)
|
||||
#define MAX_FRAC 0.999
|
||||
#define RANDOM_SIZE 256
|
||||
|
||||
#if !defined(M_PI)
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
/* How many particles can fit on one line */
|
||||
#define PARTICLES_PER_LINE 8
|
||||
|
||||
long Total_Particles;
|
||||
|
||||
/* Used to keep track of all the particles. Array in is order of inc id. */
|
||||
static particle **Particle_List;
|
||||
|
||||
particle *InitParticle(real charge, real mass);
|
||||
void PickShell(vector *v, real radius);
|
||||
real XRand(real low, real high);
|
||||
|
||||
|
||||
void
|
||||
CreateDistribution (cluster_type cluster, model_type model)
|
||||
{
|
||||
particle *particle_array;
|
||||
long global_num_particles;
|
||||
particle *new_particle;
|
||||
// char particle_state[RANDOM_SIZE];
|
||||
real charge;
|
||||
real r_scale;
|
||||
real v_scale;
|
||||
vector r_sum;
|
||||
vector v_sum;
|
||||
long end_limit = 0;
|
||||
long i;
|
||||
real temp_r;
|
||||
real radius = 0.0;
|
||||
real x_vel;
|
||||
real y_vel;
|
||||
real vel;
|
||||
real offset = 0.0;
|
||||
particle *twin_particle;
|
||||
|
||||
particle_array = (particle *) G_MALLOC(Total_Particles * sizeof(particle));
|
||||
|
||||
Particle_List = (particle **) G_MALLOC(Total_Particles * sizeof(particle *));
|
||||
for (i = 0; i < Total_Particles; i++)
|
||||
Particle_List[i] = &particle_array[i];
|
||||
|
||||
r_scale = 3 * M_PI / 16;
|
||||
v_scale = (real) sqrt(1.0 / (double) r_scale);
|
||||
r_sum.x = (real) 0.0;
|
||||
r_sum.y = (real) 0.0;
|
||||
v_sum.x = (real) 0.0;
|
||||
v_sum.y = (real) 0.0;
|
||||
// initstate(0, particle_state, RANDOM_SIZE);
|
||||
|
||||
switch (cluster) {
|
||||
case ONE_CLUSTER:
|
||||
end_limit = Total_Particles;
|
||||
switch (model) {
|
||||
case UNIFORM:
|
||||
printf("Creating a one cluster, uniform distribution for %ld ", Total_Particles);
|
||||
printf("particles\n");
|
||||
break;
|
||||
case PLUMMER:
|
||||
printf("Creating a one cluster, non uniform distribution for %ld ", Total_Particles);
|
||||
printf("particles\n");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TWO_CLUSTER:
|
||||
end_limit = (Total_Particles / 2) + (Total_Particles & 0x1);
|
||||
switch (model) {
|
||||
case UNIFORM:
|
||||
printf("Creating a two cluster, uniform distribution for %ld ", Total_Particles);
|
||||
printf("particles\n");
|
||||
break;
|
||||
case PLUMMER:
|
||||
printf("Creating a two cluster, non uniform distribution for %ld ", Total_Particles);
|
||||
printf("particles\n");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
// setstate(particle_state);
|
||||
global_num_particles = 0;
|
||||
charge = 1.0 / Total_Particles;
|
||||
charge /= Total_Particles;
|
||||
for (i = 0; i < end_limit; i++) {
|
||||
new_particle = InitParticle(charge, charge);
|
||||
switch (model) {
|
||||
case UNIFORM:
|
||||
do {
|
||||
new_particle->pos.x = XRand(-1.0, 1.0);
|
||||
new_particle->pos.y = XRand(-1.0, 1.0);
|
||||
temp_r = DOT_PRODUCT((new_particle->pos), (new_particle->pos));
|
||||
}
|
||||
while (temp_r > (real) 1.0);
|
||||
radius = sqrt(temp_r);
|
||||
break;
|
||||
case PLUMMER:
|
||||
do
|
||||
radius = (real) 1.0 / (real) sqrt(pow(XRand(0.0, MAX_FRAC),
|
||||
-2.0/3.0) - 1);
|
||||
while (radius > 9.0);
|
||||
PickShell(&(new_particle->pos), r_scale * radius);
|
||||
break;
|
||||
}
|
||||
VECTOR_ADD(r_sum, r_sum, (new_particle->pos));
|
||||
|
||||
do {
|
||||
x_vel = XRand(0.0, 1.0);
|
||||
y_vel = XRand(0.0, 0.1);
|
||||
}
|
||||
while (y_vel > x_vel * x_vel * (real) pow(1.0 - (x_vel * x_vel), 3.5));
|
||||
vel = (real) sqrt(2.0) * x_vel / pow(1.0 + (radius * radius), 0.25);
|
||||
PickShell(&(new_particle->vel), v_scale * vel);
|
||||
VECTOR_ADD(v_sum, v_sum, (new_particle->vel));
|
||||
}
|
||||
|
||||
if (cluster == TWO_CLUSTER) {
|
||||
switch (model) {
|
||||
case UNIFORM:
|
||||
offset = 1.5;
|
||||
break;
|
||||
case PLUMMER:
|
||||
offset = 2.0;
|
||||
break;
|
||||
}
|
||||
for (i = end_limit; i < Total_Particles; i++) {
|
||||
new_particle = InitParticle(charge, charge);
|
||||
twin_particle = Particle_List[i - end_limit];
|
||||
new_particle->pos.x = twin_particle->pos.x + offset;
|
||||
new_particle->pos.y = twin_particle->pos.y + offset;
|
||||
VECTOR_ADD(r_sum, r_sum, (new_particle->pos));
|
||||
new_particle->vel.x = twin_particle->vel.x;
|
||||
new_particle->vel.y = twin_particle->vel.y;
|
||||
VECTOR_ADD(v_sum, v_sum, (new_particle->vel));
|
||||
}
|
||||
}
|
||||
|
||||
VECTOR_DIV(r_sum, r_sum, (real) Total_Particles);
|
||||
VECTOR_DIV(v_sum, v_sum, (real) Total_Particles);
|
||||
for (i = 0; i < Total_Particles; i++) {
|
||||
new_particle = Particle_List[i];
|
||||
VECTOR_SUB((new_particle->pos), (new_particle->pos), r_sum);
|
||||
VECTOR_SUB((new_particle->vel), (new_particle->vel), v_sum);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
CreateParticleList (long my_id, long length)
|
||||
{
|
||||
LOCK(G_Memory->mal_lock);
|
||||
Local[my_id].Particles = (particle **) G_MALLOC(length
|
||||
* sizeof(particle *));
|
||||
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute the
|
||||
Particles data across physically distributed memories as desired.
|
||||
|
||||
One way to do this is as follows:
|
||||
|
||||
char *starting_address;
|
||||
char *ending_address;
|
||||
|
||||
starting_address = (char *) Local[my_id].Particles;
|
||||
ending_address = (((char *) Local[my_id].Particles)
|
||||
+ (length * sizeof(particle *)) - 1);
|
||||
|
||||
Place all addresses x such that (starting_address <= x < ending_address)
|
||||
on node my_id
|
||||
|
||||
*/
|
||||
|
||||
UNLOCK(G_Memory->mal_lock);
|
||||
Local[my_id].Max_Particles = length;
|
||||
Local[my_id].Num_Particles = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
InitParticleList (long my_id, long num_assigned, long starting_id)
|
||||
{
|
||||
long i;
|
||||
|
||||
for (i = 0; i < num_assigned; i++)
|
||||
Local[my_id].Particles[i] = Particle_List[i + starting_id];
|
||||
Local[my_id].Num_Particles = num_assigned;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PrintParticle (particle *p)
|
||||
*
|
||||
* Args : the address of a particle, p.
|
||||
*
|
||||
* Returns : nothing.
|
||||
*
|
||||
* Side Effects : Prints to stdout the information stored for p.
|
||||
*
|
||||
*/
|
||||
void
|
||||
PrintParticle (particle *p)
|
||||
{
|
||||
if (p != NULL) {
|
||||
printf("P %6ld :", p->id);
|
||||
printf(" Pos = ");
|
||||
PrintVector(&(p->pos));
|
||||
}
|
||||
else
|
||||
printf("Particle has not been initialized yet.\n");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PrintAllParticles ()
|
||||
{
|
||||
long i;
|
||||
|
||||
fflush(stdout);
|
||||
printf(" PARTICLE POSITIONS\n\n");
|
||||
for (i = 0; i < Total_Particles; i++) {
|
||||
PrintParticle(Particle_List[i]);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PrintParticleArrayIds (particle **p_array, long num_particles)
|
||||
{
|
||||
long tab_count = PARTICLES_PER_LINE;
|
||||
long i = 0;
|
||||
|
||||
if (num_particles == 0)
|
||||
printf("NONE\n");
|
||||
else {
|
||||
for (i = 0; i < num_particles; i++) {
|
||||
if (tab_count == 0) {
|
||||
tab_count = PARTICLES_PER_LINE;
|
||||
printf("\n");
|
||||
}
|
||||
printf("\tP%ld", p_array[i]->id);
|
||||
tab_count -= 1;
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InitParticle (long my_id, real x_pos, real y_pos, real charge)
|
||||
*
|
||||
* Args : the x_pos, y_pos, and charge (in eV) of the particle.
|
||||
*
|
||||
* Returns : the address of the newly created particle.
|
||||
*
|
||||
* Side Effects : Initializes field to 0, and sets the particle ID to a
|
||||
* unique number. Also converts charge to coulombs from eV.
|
||||
*
|
||||
*/
|
||||
particle *
|
||||
InitParticle (real charge, real mass)
|
||||
{
|
||||
particle *p;
|
||||
static long particle_id = 0;
|
||||
|
||||
p = Particle_List[particle_id];
|
||||
p->id = particle_id++;
|
||||
p->charge = charge;
|
||||
p->mass = mass;
|
||||
p->pos.x = (real) 0.0;
|
||||
p->pos.y = (real) 0.0;
|
||||
p->vel.x = (real) 0.0;
|
||||
p->vel.y = (real) 0.0;
|
||||
p->acc.x = (real) 0.0;
|
||||
p->acc.y = (real) 0.0;
|
||||
p->field.r = (real) 0.0;
|
||||
p->field.i = (real) 0.0;
|
||||
p->cost = 1;
|
||||
p->box = 0.0;
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PickShell (vector *v, real radius)
|
||||
{
|
||||
real temp_r;
|
||||
real r_scale;
|
||||
|
||||
do {
|
||||
v->x = XRand(-1.0, 1.0);
|
||||
v->y = XRand(-1.0, 1.0);
|
||||
temp_r = DOT_PRODUCT((*v), (*v));
|
||||
}
|
||||
while (temp_r >1.0);
|
||||
r_scale = radius / (real) sqrt(temp_r);
|
||||
VECTOR_MUL((*v), (*v), r_scale);
|
||||
}
|
||||
|
||||
|
||||
real
|
||||
XRand (real low, real high)
|
||||
{
|
||||
real ret_val;
|
||||
|
||||
ret_val = low + (high - low) * ((real) rand/*om*/() / 2147483647.0);
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
|
||||
#undef PARTICLES_PER_LINE
|
||||
#undef MAX_FRAC
|
||||
#undef RANDOM_SIZE
|
||||
#undef ONE_EV
|
67
splash2/codes/apps/fmm/particle.H
Normal file
67
splash2/codes/apps/fmm/particle.H
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _Particle_H
|
||||
#define _Particle_H 1
|
||||
|
||||
#include "defs.h"
|
||||
|
||||
typedef struct _Particle particle;
|
||||
typedef struct _Particle_Node particle_node;
|
||||
|
||||
typedef enum { ONE_CLUSTER, TWO_CLUSTER } cluster_type;
|
||||
typedef enum { UNIFORM, PLUMMER } model_type;
|
||||
|
||||
/* Every particle has :
|
||||
* 1. A unique ID number
|
||||
* 2. An x and y position
|
||||
* 3. A charge
|
||||
* 4. The field that acts on it due to every other particle
|
||||
*
|
||||
* The force is what the algorithm actually calculates.
|
||||
*/
|
||||
struct _Particle
|
||||
{
|
||||
long id;
|
||||
real charge;
|
||||
real mass;
|
||||
vector pos;
|
||||
vector acc;
|
||||
vector vel;
|
||||
complex field;
|
||||
long cost;
|
||||
real box;
|
||||
};
|
||||
|
||||
|
||||
/* This structure is used for linked lists of particles */
|
||||
struct _Particle_Node
|
||||
{
|
||||
particle *data;
|
||||
particle_node *next;
|
||||
};
|
||||
|
||||
extern long Total_Particles;
|
||||
|
||||
extern void CreateParticleList(long my_id, long length);
|
||||
extern void InitParticleList(long my_id, long num_assigned, long starting_id);
|
||||
extern void CreateDistribution(cluster_type cluster, model_type model);
|
||||
extern void PrintParticle(particle *p);
|
||||
extern void PrintAllParticles(void);
|
||||
extern void PrintParticleArrayIds(particle **p_array, long num_particles);
|
||||
|
||||
|
||||
#endif /* _Particle_H */
|
373
splash2/codes/apps/fmm/partition_grid.C
Normal file
373
splash2/codes/apps/fmm/partition_grid.C
Normal file
|
@ -0,0 +1,373 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#include "defs.h"
|
||||
#include "memory.h"
|
||||
#include "particle.h"
|
||||
#include "box.h"
|
||||
#include "partition_grid.h"
|
||||
|
||||
#define DIVISOR(x) ((x <= 20) ? 1 : ((x - 20) * 50))
|
||||
|
||||
typedef struct _Id_Info id_info;
|
||||
struct _Id_Info
|
||||
{
|
||||
long id;
|
||||
long num;
|
||||
};
|
||||
|
||||
typedef struct _Cost_Info cost_info;
|
||||
struct _Cost_Info
|
||||
{
|
||||
long cost;
|
||||
long num;
|
||||
};
|
||||
|
||||
long CheckBox(long my_id, box *b, long partition_level);
|
||||
|
||||
void
|
||||
InitPartition (long my_id)
|
||||
{
|
||||
long i;
|
||||
|
||||
Local[my_id].Childless_Partition = NULL;
|
||||
for (i = 0; i < MAX_LEVEL; i++) {
|
||||
Local[my_id].Parent_Partition[i] = NULL;
|
||||
}
|
||||
Local[my_id].Max_Parent_Level = -1;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PartitionIterate (long my_id, partition_function function,
|
||||
partition_start position)
|
||||
{
|
||||
box *b;
|
||||
long i;
|
||||
|
||||
if (position == CHILDREN) {
|
||||
b = Local[my_id].Childless_Partition;
|
||||
while (b != NULL) {
|
||||
(*function)(my_id, b);
|
||||
b = b->next;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (position == TOP) {
|
||||
for (i = 0; i <= Local[my_id].Max_Parent_Level; i++) {
|
||||
b = Local[my_id].Parent_Partition[i];
|
||||
while (b != NULL) {
|
||||
(*function)(my_id, b);
|
||||
b = b->next;
|
||||
}
|
||||
}
|
||||
b = Local[my_id].Childless_Partition;
|
||||
while (b != NULL) {
|
||||
(*function)(my_id, b);
|
||||
b = b->next;
|
||||
}
|
||||
}
|
||||
else {
|
||||
b = Local[my_id].Childless_Partition;
|
||||
while (b != NULL) {
|
||||
(*function)(my_id, b);
|
||||
b = b->next;
|
||||
}
|
||||
for (i = Local[my_id].Max_Parent_Level; i >= 0; i--) {
|
||||
b = Local[my_id].Parent_Partition[i];
|
||||
while (b != NULL) {
|
||||
(*function)(my_id, b);
|
||||
b = b->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
InsertBoxInPartition (long my_id, box *b)
|
||||
{
|
||||
box *level_list;
|
||||
|
||||
if (b->type == CHILDLESS) {
|
||||
b->prev = NULL;
|
||||
if (Local[my_id].Childless_Partition != NULL)
|
||||
Local[my_id].Childless_Partition->prev = b;
|
||||
b->next = Local[my_id].Childless_Partition;
|
||||
Local[my_id].Childless_Partition = b;
|
||||
}
|
||||
else {
|
||||
level_list = Local[my_id].Parent_Partition[b->level];
|
||||
b->prev = NULL;
|
||||
if (level_list != NULL)
|
||||
level_list->prev = b;
|
||||
b->next = level_list;
|
||||
Local[my_id].Parent_Partition[b->level] = b;
|
||||
if (b->level > Local[my_id].Max_Parent_Level) {
|
||||
Local[my_id].Max_Parent_Level = b->level;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
RemoveBoxFromPartition (long my_id, box *b)
|
||||
{
|
||||
if (b->type == CHILDLESS) {
|
||||
if (b->prev != NULL)
|
||||
b->prev->next = b->next;
|
||||
else
|
||||
Local[my_id].Childless_Partition = b->next;
|
||||
if (b->next != NULL)
|
||||
b->next->prev = b->prev;
|
||||
}
|
||||
else {
|
||||
if (b->prev != NULL)
|
||||
b->prev->next = b->next;
|
||||
else
|
||||
Local[my_id].Parent_Partition[b->level] = b->next;
|
||||
if (b->next != NULL)
|
||||
b->next->prev = b->prev;
|
||||
if ((b->level == Local[my_id].Max_Parent_Level) &&
|
||||
(Local[my_id].Parent_Partition[b->level] == NULL)) {
|
||||
while (Local[my_id].Parent_Partition[Local[my_id].Max_Parent_Level]
|
||||
== NULL)
|
||||
Local[my_id].Max_Parent_Level -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ComputeCostOfBox (box *b)
|
||||
{
|
||||
long different_costs;
|
||||
long i;
|
||||
long j;
|
||||
long new_cost;
|
||||
cost_info cost_list[MAX_PARTICLES_PER_BOX];
|
||||
cost_info winner;
|
||||
long winner_index;
|
||||
long cost_index[MAX_PARTICLES_PER_BOX];
|
||||
|
||||
if (b->type == PARENT)
|
||||
b->cost = ((b->num_v_list * V_LIST_COST(Expansion_Terms))
|
||||
/ DIVISOR(Expansion_Terms)) + 1;
|
||||
else {
|
||||
different_costs = 0;
|
||||
for (i = 0; i < b->num_particles; i++) {
|
||||
new_cost = b->particles[i]->cost;
|
||||
for (j = 0; j < different_costs; j++) {
|
||||
if (new_cost == cost_list[j].cost)
|
||||
break;
|
||||
}
|
||||
if (j == different_costs) {
|
||||
cost_list[different_costs].cost = new_cost;
|
||||
cost_list[different_costs].num = 1;
|
||||
different_costs += 1;
|
||||
}
|
||||
else
|
||||
cost_list[j].num += 1;
|
||||
}
|
||||
|
||||
winner.cost = cost_list[0].cost;
|
||||
winner.num = 1;
|
||||
winner_index = 0;
|
||||
cost_index[0] = 0;
|
||||
for (i = 1; i < different_costs; i++) {
|
||||
if (cost_list[i].num > cost_list[winner_index].num) {
|
||||
winner.cost = cost_list[i].cost;
|
||||
winner.num = 1;
|
||||
winner_index = i;
|
||||
cost_index[0] = i;
|
||||
}
|
||||
else {
|
||||
if (cost_list[i].num == cost_list[winner_index].num) {
|
||||
cost_index[winner.num] = i;
|
||||
winner.num += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (winner.num != 1) {
|
||||
for (i = 1; i < winner.num; i++)
|
||||
winner.cost += cost_list[cost_index[i]].cost;
|
||||
winner.cost /= winner.num;
|
||||
}
|
||||
b->cost = (winner.cost * b->num_particles) / DIVISOR(Expansion_Terms);
|
||||
if (b->cost == 0)
|
||||
b->cost = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
CheckPartition (long my_id)
|
||||
{
|
||||
long i;
|
||||
box *b;
|
||||
long NE, NoP, CB, PB;
|
||||
long Q1, Q2, Q3, Q4;
|
||||
long PC, CC;
|
||||
real xpos, ypos;
|
||||
|
||||
NE = NoP = CB = PB = Q1 = Q2 = Q3 = Q4 = PC = CC = 0;
|
||||
for (i = 0; i <= Local[my_id].Max_Parent_Level; i++) {
|
||||
b = Local[my_id].Parent_Partition[i];
|
||||
while (b != NULL) {
|
||||
NE += CheckBox(my_id, b, i);
|
||||
PB += 1;
|
||||
PC += b->cost;
|
||||
b = b->next;
|
||||
}
|
||||
}
|
||||
b = Local[my_id].Childless_Partition;
|
||||
while (b != NULL) {
|
||||
NE += CheckBox(my_id, b, -1);
|
||||
for (i = 0; i < b->num_particles; i++) {
|
||||
xpos = b->particles[i]->pos.x;
|
||||
ypos = b->particles[i]->pos.y;
|
||||
if (xpos > Grid->x_center) {
|
||||
if (ypos > Grid->y_center)
|
||||
Q1 += 1;
|
||||
else
|
||||
Q4 += 1;
|
||||
}
|
||||
else {
|
||||
if (ypos > Grid->y_center)
|
||||
Q2 += 1;
|
||||
else
|
||||
Q3 += 1;
|
||||
}
|
||||
}
|
||||
NoP += b->num_particles;
|
||||
CB += 1;
|
||||
CC += b->cost;
|
||||
b = b->next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
long
|
||||
CheckBox (long my_id, box *b, long partition_level)
|
||||
{
|
||||
long num_errors;
|
||||
|
||||
num_errors = 0;
|
||||
if (b->type == CHILDLESS) {
|
||||
if (partition_level != -1) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : CHILDLESS box in parent partition (B%f P%ld %ld)\n", b->id, my_id, b->proc);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
if (b->num_children != 0) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : CHILDLESS box has children (B%f P%ld)\n", b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
if (b->num_particles == 0) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : CHILDLESS box has no particles (B%f P%ld)\n", b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
if (b->particles[b->num_particles - 1] == NULL) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : CHILDLESS box has fewer particles than expected ");
|
||||
printf("(B%f P%ld)\n", b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
if (b->particles[b->num_particles] != NULL) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : CHILDLESS box has more particles than expected ");
|
||||
printf("(B%f P%ld)\n", b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (partition_level == -1) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : PARENT box in childless partition (B%f P%ld %ld)\n",
|
||||
b->id, my_id, b->proc);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
else {
|
||||
if (partition_level != b->level) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : PARENT box in wrong partition level ");
|
||||
printf("(%ld vs %ld) (B%f P%ld)\n", b->level, partition_level, b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
}
|
||||
if (b->num_children == 0) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : PARENT box has no children (B%f P%ld)\n", b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
if (b->num_particles != 0) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
printf("ERROR : PARENT box has particles (B%f P%ld)\n", b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
}
|
||||
if (b->parent == NULL) {
|
||||
if (b != Grid) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
if (b->type == CHILDLESS)
|
||||
printf("ERROR : Extra CHILDLESS box in partition (B%f P%ld)\n", b->id, my_id);
|
||||
else
|
||||
printf("ERROR : Extra PARENT box in partition (B%f P%ld)\n", b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (b->parent->children[b->child_num] != b) {
|
||||
LOCK(G_Memory->io_lock);
|
||||
if (b->type == CHILDLESS)
|
||||
printf("ERROR : Extra CHILDLESS box in partition (B%f P%ld)\n", b->id, my_id);
|
||||
else
|
||||
printf("ERROR : Extra PARENT box in partition (B%f P%ld)\n", b->id, my_id);
|
||||
fflush(stdout);
|
||||
UNLOCK(G_Memory->io_lock);
|
||||
num_errors += 1;
|
||||
}
|
||||
}
|
||||
return num_errors;
|
||||
}
|
||||
|
||||
|
||||
#undef DIVISOR
|
37
splash2/codes/apps/fmm/partition_grid.H
Normal file
37
splash2/codes/apps/fmm/partition_grid.H
Normal file
|
@ -0,0 +1,37 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#ifndef _Partition_H
|
||||
#define _Partition_H 1
|
||||
|
||||
#include "defs.h"
|
||||
#include "box.h"
|
||||
|
||||
/* Void function type */
|
||||
typedef void (*partition_function)(long my_id, box *b);
|
||||
|
||||
typedef enum { TOP, BOTTOM, CHILDREN } partition_start;
|
||||
typedef enum { ORB, COST_ZONES } partition_alg;
|
||||
|
||||
extern void InitPartition(long my_id);
|
||||
extern void PartitionIterate(long my_id, partition_function function,
|
||||
partition_start position);
|
||||
extern void InsertBoxInPartition(long my_id, box *b);
|
||||
extern void RemoveBoxFromPartition(long my_id, box *b);
|
||||
extern void ComputeCostOfBox(box *b);
|
||||
extern void CheckPartition(long my_id);
|
||||
|
||||
#endif /* _Partition_H */
|
83
splash2/codes/apps/ocean/README.ocean
Normal file
83
splash2/codes/apps/ocean/README.ocean
Normal file
|
@ -0,0 +1,83 @@
|
|||
GENERAL INFORMATION:
|
||||
|
||||
The OCEAN program simulates large-scale ocean movements based on eddy and
|
||||
boundary currents, and is an enhanced version of the SPLASH Ocean code.
|
||||
A description of the functionality of this code can be found in the
|
||||
original SPLASH report. The implementations contained in SPLASH-2
|
||||
differ from the original SPLASH implementation in the following ways:
|
||||
|
||||
(1) The SPLASH-2 implementations are written in C rather than
|
||||
FORTRAN.
|
||||
(2) Grids are partitioned into square-like subgrids rather than
|
||||
groups of columns to improve the communication to computation
|
||||
ratio.
|
||||
(3) The SOR solver in the SPLASH Ocean code has been replaced with a
|
||||
restricted Red-Black Gauss-Seidel Multigrid solver based on that
|
||||
presented in:
|
||||
|
||||
Brandt, A. Multi-Level Adaptive Solutions to Boundary-Value Problems.
|
||||
Mathematics of Computation, 31(138):333-390, April 1977.
|
||||
|
||||
The solver is restricted so that each processor has as least two
|
||||
grid points in each dimension in each grid subpartition.
|
||||
|
||||
Two implementations are provided in the SPLASH-2 distribution:
|
||||
|
||||
(1) Non-contiguous partition allocation
|
||||
|
||||
This implementation (contained in the non_contiguous_partitions
|
||||
subdirectory) implements the grids to be operated on with
|
||||
two-dimensional arrays. This data structure prevents partitions
|
||||
from being allocated contiguously, but leads to a conceptually
|
||||
simple programming implementation.
|
||||
|
||||
(2) Contiguous partition allocation
|
||||
|
||||
This implementation (contained in the contiguous_partitions
|
||||
subdirectory) implements the grids to be operated on with
|
||||
3-dimensional arrays. The first dimension specifies the processor
|
||||
which owns the partition, and the second and third dimensions
|
||||
specify the x and y offset within a partition. This data structure
|
||||
allows partitions to be allocated contiguously and entirely in the
|
||||
local memory of processors that "own" them, thus enhancing data
|
||||
locality properties.
|
||||
|
||||
The contiguous partition allocation implementation is described in:
|
||||
|
||||
Woo, S. C., Singh, J. P., and Hennessy, J. L. The Performance Advantages
|
||||
of Integrating Message Passing in Cache-Coherent Multiprocessors.
|
||||
Technical Report CSL-TR-93-593, Stanford University, December 1993.
|
||||
|
||||
A detailed description of both versions will appear in the SPLASH-2 report.
|
||||
The non-contiguous partition allocation implementation is conceptually
|
||||
similar, except for the use of statically allocated 2-dimensional arrays.
|
||||
|
||||
These programs work under both the Unix FORK and SPROC models.
|
||||
|
||||
RUNNING THE PROGRAM:
|
||||
|
||||
To see how to run the program, please see the comment at the top of the
|
||||
file main.C, or run the application with the "-h" command line option.
|
||||
Five command line parameters can be specified, of which the ones which
|
||||
would normally be changed are the number of grid points in each dimension,
|
||||
and the number of processors. The number of grid points must be a
|
||||
(power of 2+2) in each dimension (e.g. 130, 258, etc.). The number of
|
||||
processors must be a power of 2. Timing information is printed out at
|
||||
the end of the program. The first timestep is considered part of the
|
||||
initialization phase of the program, and hence is not included in the
|
||||
"Total time without initialization."
|
||||
|
||||
BASE PROBLEM SIZE:
|
||||
|
||||
The base problem size for an upto-64 processor machine is a 258x258 grid.
|
||||
The default values should be used for other parameters (except the number
|
||||
of processors, which can be varied). In addition, sample output files
|
||||
for the default parameters for each version of the code are contained in
|
||||
the file correct.out in each subdirectory.
|
||||
|
||||
DATA DISTRIBUTION:
|
||||
|
||||
Our "POSSIBLE ENHANCEMENT" comments in the source code tell where one
|
||||
might want to distribute data and how. Data distribution has an impact
|
||||
on performance on the Stanford DASH multiprocessor.
|
||||
|
16
splash2/codes/apps/ocean/contiguous_partitions/Makefile
Normal file
16
splash2/codes/apps/ocean/contiguous_partitions/Makefile
Normal file
|
@ -0,0 +1,16 @@
|
|||
TARGET = OCEAN
|
||||
OBJS = jacobcalc.o jacobcalc2.o laplacalc.o linkup.o main.o multi.o slave1.o slave2.o subblock.o
|
||||
|
||||
include ../../../Makefile.config
|
||||
|
||||
decs.h: decs.H
|
||||
jacobcalc.c: decs.h
|
||||
linkup.c: decs.h
|
||||
slave1.c: decs.h
|
||||
jacobcalc2.c: decs.h
|
||||
main.c: decs.h
|
||||
slave2.c: decs.h
|
||||
laplacalc.c: decs.h
|
||||
multi.c: decs.h
|
||||
subblock.c: decs.h
|
||||
|
40
splash2/codes/apps/ocean/contiguous_partitions/correct.out
Normal file
40
splash2/codes/apps/ocean/contiguous_partitions/correct.out
Normal file
|
@ -0,0 +1,40 @@
|
|||
shmid 0x2fa8 shmvaddr 0x500000
|
||||
|
||||
Ocean simulation with W-cycle multigrid solver
|
||||
Processors : 1
|
||||
Grid size : 258 x 258
|
||||
Grid resolution (meters) : 20000.00
|
||||
Time between relaxations (seconds) : 28800
|
||||
Error tolerance : 1e-07
|
||||
|
||||
MULTIGRID OUTPUTS
|
||||
iter 71, level 7, residual norm 8.00274594e-08, work = 33.875
|
||||
iter 31, level 7, residual norm 4.08062997e-08, work = 13.563
|
||||
iter 22, level 7, residual norm 5.94548243e-08, work = 9.438
|
||||
iter 12, level 7, residual norm 4.05573539e-08, work = 6.188
|
||||
iter 2, level 7, residual norm 8.20209761e-08, work = 2.000
|
||||
iter 5, level 7, residual norm 6.54258351e-08, work = 5.000
|
||||
iter 3, level 7, residual norm 7.23930444e-08, work = 3.000
|
||||
iter 12, level 7, residual norm 3.56346335e-08, work = 6.188
|
||||
iter 2, level 7, residual norm 5.93080936e-08, work = 2.000
|
||||
iter 4, level 7, residual norm 8.54596942e-08, work = 4.000
|
||||
iter 11, level 7, residual norm 3.70162668e-08, work = 6.125
|
||||
iter 13, level 7, residual norm 3.34750526e-08, work = 7.188
|
||||
iter 12, level 7, residual norm 2.45353138e-08, work = 6.188
|
||||
|
||||
PROCESS STATISTICS
|
||||
Total Multigrid Multigrid
|
||||
Proc Time Time Fraction
|
||||
0 50030404 20050068 0.401
|
||||
Avg 50030404 20050068 0.401
|
||||
Min 50030404 20050068 0.401
|
||||
Max 50030404 20050068 0.401
|
||||
|
||||
TIMING INFORMATION
|
||||
Start time : 1114891426
|
||||
Initialization finish time : 1152301729
|
||||
Overall finish time : 1202332135
|
||||
Total time with initialization : 87440709
|
||||
Total time without initialization : 50030406
|
||||
(excludes first timestep)
|
||||
|
228
splash2/codes/apps/ocean/contiguous_partitions/decs.H
Normal file
228
splash2/codes/apps/ocean/contiguous_partitions/decs.H
Normal file
|
@ -0,0 +1,228 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#define MASTER 0
|
||||
#define RED_ITER 0
|
||||
#define BLACK_ITER 1
|
||||
#define UP 0
|
||||
#define DOWN 1
|
||||
#define LEFT 2
|
||||
#define RIGHT 3
|
||||
#define UPLEFT 4
|
||||
#define UPRIGHT 5
|
||||
#define DOWNLEFT 6
|
||||
#define DOWNRIGHT 7
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
struct multi_struct {
|
||||
double err_multi;
|
||||
};
|
||||
|
||||
extern struct multi_struct *multi;
|
||||
|
||||
struct global_struct {
|
||||
long id;
|
||||
long starttime;
|
||||
long trackstart;
|
||||
double psiai;
|
||||
double psibi;
|
||||
};
|
||||
|
||||
extern struct global_struct *global;
|
||||
|
||||
extern double eig2;
|
||||
extern double ysca;
|
||||
extern long jmm1;
|
||||
extern double pi;
|
||||
extern double t0;
|
||||
|
||||
extern double ****psi;
|
||||
extern double ****psim;
|
||||
extern double ***psium;
|
||||
extern double ***psilm;
|
||||
extern double ***psib;
|
||||
extern double ***ga;
|
||||
extern double ***gb;
|
||||
extern double ****work1;
|
||||
extern double ***work2;
|
||||
extern double ***work3;
|
||||
extern double ****work4;
|
||||
extern double ****work5;
|
||||
extern double ***work6;
|
||||
extern double ****work7;
|
||||
extern double ****temparray;
|
||||
extern double ***tauz;
|
||||
extern double ***oldga;
|
||||
extern double ***oldgb;
|
||||
extern double *f;
|
||||
extern double ****q_multi;
|
||||
extern double ****rhs_multi;
|
||||
|
||||
struct locks_struct {
|
||||
LOCKDEC(idlock)
|
||||
LOCKDEC(psiailock)
|
||||
LOCKDEC(psibilock)
|
||||
LOCKDEC(donelock)
|
||||
LOCKDEC(error_lock)
|
||||
LOCKDEC(bar_lock)
|
||||
};
|
||||
|
||||
extern struct locks_struct *locks;
|
||||
|
||||
struct bars_struct {
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARDEC(iteration)
|
||||
BARDEC(gsudn)
|
||||
BARDEC(p_setup)
|
||||
BARDEC(p_redph)
|
||||
BARDEC(p_soln)
|
||||
BARDEC(p_subph)
|
||||
BARDEC(sl_prini)
|
||||
BARDEC(sl_psini)
|
||||
BARDEC(sl_onetime)
|
||||
BARDEC(sl_phase_1)
|
||||
BARDEC(sl_phase_2)
|
||||
BARDEC(sl_phase_3)
|
||||
BARDEC(sl_phase_4)
|
||||
BARDEC(sl_phase_5)
|
||||
BARDEC(sl_phase_6)
|
||||
BARDEC(sl_phase_7)
|
||||
BARDEC(sl_phase_8)
|
||||
BARDEC(sl_phase_9)
|
||||
BARDEC(sl_phase_10)
|
||||
BARDEC(error_barrier)
|
||||
#else
|
||||
BARDEC(barrier)
|
||||
#endif
|
||||
};
|
||||
|
||||
extern struct bars_struct *bars;
|
||||
|
||||
extern double factjacob;
|
||||
extern double factlap;
|
||||
|
||||
struct Global_Private {
|
||||
char pad[PAGE_SIZE];
|
||||
long *rel_num_x;
|
||||
long *rel_num_y;
|
||||
long *eist;
|
||||
long *ejst;
|
||||
long *oist;
|
||||
long *ojst;
|
||||
long *rlist;
|
||||
long *rljst;
|
||||
long *rlien;
|
||||
long *rljen;
|
||||
long rownum;
|
||||
long colnum;
|
||||
long neighbors[8];
|
||||
double multi_time;
|
||||
double total_time;
|
||||
};
|
||||
|
||||
extern struct Global_Private *gp;
|
||||
|
||||
extern double *i_int_coeff;
|
||||
extern double *j_int_coeff;
|
||||
extern long xprocs;
|
||||
extern long yprocs;
|
||||
|
||||
extern long numlev;
|
||||
extern long *imx;
|
||||
extern long *jmx;
|
||||
extern double *lev_res;
|
||||
extern double *lev_tol;
|
||||
extern double maxwork;
|
||||
extern long *xpts_per_proc;
|
||||
extern long *ypts_per_proc;
|
||||
extern long minlevel;
|
||||
extern double outday0;
|
||||
extern double outday1;
|
||||
extern double outday2;
|
||||
extern double outday3;
|
||||
|
||||
extern long nprocs;
|
||||
extern double h1;
|
||||
extern double h3;
|
||||
extern double h;
|
||||
extern double lf;
|
||||
extern double res;
|
||||
extern double dtau;
|
||||
extern double f0;
|
||||
extern double beta;
|
||||
extern double gpr;
|
||||
extern long im;
|
||||
extern long jm;
|
||||
extern long do_stats;
|
||||
extern long do_output;
|
||||
extern long *multi_times;
|
||||
extern long *total_times;
|
||||
|
||||
/*
|
||||
* jacobcalc.C
|
||||
*/
|
||||
void jacobcalc(double ***x, double ***y, double ***z, long pid, long firstrow, long lastrow, long firstcol, long lastcol);
|
||||
|
||||
/*
|
||||
* jacobcalc2.C
|
||||
*/
|
||||
void jacobcalc2(double ****x, double ****y, double ****z, long psiindex, long pid, long firstrow, long lastrow, long firstcol, long lastcol);
|
||||
|
||||
/*
|
||||
* laplacalc.C
|
||||
*/
|
||||
void laplacalc(long procid, double ****x, double ****z, long psiindex, long firstrow, long lastrow, long firstcol, long lastcol);
|
||||
|
||||
/*
|
||||
* linkup.C
|
||||
*/
|
||||
void link_all(void);
|
||||
void linkup(double **row_ptr);
|
||||
void link_multi(void);
|
||||
|
||||
/*
|
||||
* main.C
|
||||
*/
|
||||
long log_2(long number);
|
||||
void printerr(char *s);
|
||||
|
||||
/*
|
||||
* multi.C
|
||||
*/
|
||||
void multig(long my_id);
|
||||
void relax(long k, double *err, long color, long my_num);
|
||||
void rescal(long kf, long my_num);
|
||||
void intadd(long kc, long my_num);
|
||||
void putz(long k, long my_num);
|
||||
void copy_borders(long k, long pid);
|
||||
void copy_rhs_borders(long k, long procid);
|
||||
void copy_red(long k, long procid);
|
||||
void copy_black(long k, long procid);
|
||||
|
||||
/*
|
||||
* slave1.C
|
||||
*/
|
||||
void slave(void);
|
||||
|
||||
/*
|
||||
* slave2.C
|
||||
*/
|
||||
void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols);
|
||||
|
||||
/*
|
||||
* subblock.C
|
||||
*/
|
||||
void subblock(void);
|
352
splash2/codes/apps/ocean/contiguous_partitions/jacobcalc.C
Normal file
352
splash2/codes/apps/ocean/contiguous_partitions/jacobcalc.C
Normal file
|
@ -0,0 +1,352 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* Does the arakawa jacobian calculation (of the x and y matrices,
|
||||
putting the results in the z matrix) for a subblock. */
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include "decs.h"
|
||||
|
||||
void jacobcalc(double ***x, double ***y, double ***z, long pid, long firstrow, long lastrow, long firstcol, long lastcol)
|
||||
{
|
||||
double f1;
|
||||
double f2;
|
||||
double f3;
|
||||
double f4;
|
||||
double f5;
|
||||
double f6;
|
||||
double f7;
|
||||
double f8;
|
||||
long iindex;
|
||||
long indexp1;
|
||||
long indexm1;
|
||||
long im1;
|
||||
long ip1;
|
||||
long i;
|
||||
long j;
|
||||
long jj;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double **t2c;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
double *t1c;
|
||||
double *t1d;
|
||||
double *t1e;
|
||||
double *t1f;
|
||||
double *t1g;
|
||||
|
||||
t2a = (double **) z[pid];
|
||||
if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0]=0.0;
|
||||
}
|
||||
if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0]=0.0;
|
||||
}
|
||||
if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1]=0.0;
|
||||
}
|
||||
if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1]=0.0;
|
||||
}
|
||||
|
||||
t2a = (double **) x[pid];
|
||||
jj = gp[pid].neighbors[UPLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0]=x[jj][im-2][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[UPRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1]=x[jj][im-2][1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0]=x[jj][1][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1]=x[jj][1][1];
|
||||
}
|
||||
|
||||
t2a = (double **) y[pid];
|
||||
jj = gp[pid].neighbors[UPLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0]=y[jj][im-2][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[UPRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1]=y[jj][im-2][1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0]=y[jj][1][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1]=y[jj][1][1];
|
||||
}
|
||||
|
||||
t2a = (double **) x[pid];
|
||||
if (gp[pid].neighbors[UP] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = x[jj][0][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = x[jj][1][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = x[jj][0][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = x[jj][1][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = x[jj][im-1][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = x[jj][im-2][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = x[jj][im-1][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = x[jj][im-2][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = x[jj][im-2][0];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = x[jj][1][0];
|
||||
}
|
||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = x[jj][im-2][jm-1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = x[jj][1][jm-1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = (double **) y[pid];
|
||||
if (gp[pid].neighbors[UP] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = y[jj][0][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = y[jj][1][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = y[jj][0][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = y[jj][1][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = y[jj][im-1][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = y[jj][im-2][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = y[jj][im-1][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = y[jj][im-2][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = y[jj][im-2][0];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = y[jj][1][0];
|
||||
}
|
||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = y[jj][im-2][jm-1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = y[jj][1][jm-1];
|
||||
}
|
||||
}
|
||||
|
||||
j = gp[pid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) y[j][im-2];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) y[j][1];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) y[j];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) y[j];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = (double **) x[pid];
|
||||
j = gp[pid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) x[j][im-2];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) x[j][1];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) x[j];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) x[j];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = (double **) x[pid];
|
||||
t2b = (double **) y[pid];
|
||||
t2c = (double **) z[pid];
|
||||
for (i=firstrow;i<=lastrow;i++) {
|
||||
ip1 = i+1;
|
||||
im1 = i-1;
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
t1c = (double *) t2c[i];
|
||||
t1d = (double *) t2b[ip1];
|
||||
t1e = (double *) t2b[im1];
|
||||
t1f = (double *) t2a[ip1];
|
||||
t1g = (double *) t2a[im1];
|
||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
indexp1 = iindex+1;
|
||||
indexm1 = iindex-1;
|
||||
f1 = (t1b[indexm1]+t1d[indexm1]-
|
||||
t1b[indexp1]-t1d[indexp1])*
|
||||
(t1f[iindex]-t1a[iindex]);
|
||||
f2 = (t1e[indexm1]+t1b[indexm1]-
|
||||
t1e[indexp1]-t1b[indexp1])*
|
||||
(t1a[iindex]-t1g[iindex]);
|
||||
f3 = (t1d[iindex]+t1d[indexp1]-
|
||||
t1e[iindex]-t1e[indexp1])*
|
||||
(t1a[indexp1]-t1a[iindex]);
|
||||
f4 = (t1d[indexm1]+t1d[iindex]-
|
||||
t1e[indexm1]-t1e[iindex])*
|
||||
(t1a[iindex]-t1a[indexm1]);
|
||||
f5 = (t1d[iindex]-t1b[indexp1])*
|
||||
(t1f[indexp1]-t1a[iindex]);
|
||||
f6 = (t1b[indexm1]-t1e[iindex])*
|
||||
(t1a[iindex]-t1g[indexm1]);
|
||||
f7 = (t1b[indexp1]-t1e[iindex])*
|
||||
(t1g[indexp1]-t1a[iindex]);
|
||||
f8 = (t1d[iindex]-t1b[indexm1])*
|
||||
(t1a[iindex]-t1f[indexm1]);
|
||||
|
||||
t1c[iindex] = factjacob*(f1+f2+f3+f4+f5+f6+f7+f8);
|
||||
}
|
||||
}
|
||||
|
||||
if (gp[pid].neighbors[UP] == -1) {
|
||||
t1c = (double *) t2c[0];
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
t1c[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[pid].neighbors[DOWN] == -1) {
|
||||
t1c = (double *) t2c[im-1];
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
t1c[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[pid].neighbors[LEFT] == -1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
t2c[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[pid].neighbors[RIGHT] == -1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
t2c[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
354
splash2/codes/apps/ocean/contiguous_partitions/jacobcalc2.C
Normal file
354
splash2/codes/apps/ocean/contiguous_partitions/jacobcalc2.C
Normal file
|
@ -0,0 +1,354 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* Does the arakawa jacobian calculation (of the x and y matrices,
|
||||
putting the results in the z matrix) for a subblock. */
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include "decs.h"
|
||||
|
||||
void jacobcalc2(double ****x, double ****y, double ****z, long psiindex, long pid, long firstrow, long lastrow, long firstcol, long lastcol)
|
||||
{
|
||||
double f1;
|
||||
double f2;
|
||||
double f3;
|
||||
double f4;
|
||||
double f5;
|
||||
double f6;
|
||||
double f7;
|
||||
double f8;
|
||||
long iindex;
|
||||
long indexp1;
|
||||
long indexm1;
|
||||
long im1;
|
||||
long ip1;
|
||||
long i;
|
||||
long j;
|
||||
long jj;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double **t2c;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
double *t1c;
|
||||
double *t1d;
|
||||
double *t1e;
|
||||
double *t1f;
|
||||
double *t1g;
|
||||
|
||||
t2a = z[pid][psiindex];
|
||||
if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0]=0.0;
|
||||
}
|
||||
if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0]=0.0;
|
||||
}
|
||||
if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1]=0.0;
|
||||
}
|
||||
if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1]=0.0;
|
||||
}
|
||||
|
||||
t2a = x[pid][psiindex];
|
||||
jj = gp[pid].neighbors[UPLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0]=x[jj][psiindex][im-2][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[UPRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1]=x[jj][psiindex][im-2][1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0]=x[jj][psiindex][1][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1]=x[jj][psiindex][1][1];
|
||||
}
|
||||
|
||||
t2a = y[pid][psiindex];
|
||||
jj = gp[pid].neighbors[UPLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0]=y[jj][psiindex][im-2][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[UPRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1]=y[jj][psiindex][im-2][1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0]=y[jj][psiindex][1][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1]=y[jj][psiindex][1][1];
|
||||
}
|
||||
|
||||
t2a = x[pid][psiindex];
|
||||
if (gp[pid].neighbors[UP] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = x[jj][psiindex][0][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = x[jj][psiindex][1][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = x[jj][psiindex][0][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = x[jj][psiindex][1][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = x[jj][psiindex][im-1][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = x[jj][psiindex][im-2][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = x[jj][psiindex][im-1][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = x[jj][psiindex][im-2][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = x[jj][psiindex][im-2][0];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = x[jj][psiindex][1][0];
|
||||
}
|
||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = x[jj][psiindex][im-2][jm-1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = x[jj][psiindex][1][jm-1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = y[pid][psiindex];
|
||||
if (gp[pid].neighbors[UP] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = y[jj][psiindex][0][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = y[jj][psiindex][1][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = y[jj][psiindex][0][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = y[jj][psiindex][1][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = y[jj][psiindex][im-1][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = y[jj][psiindex][im-2][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = y[jj][psiindex][im-1][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = y[jj][psiindex][im-2][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = y[jj][psiindex][im-2][0];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = y[jj][psiindex][1][0];
|
||||
}
|
||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = y[jj][psiindex][im-2][jm-1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = y[jj][psiindex][1][jm-1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = y[pid][psiindex];
|
||||
j = gp[pid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) y[j][psiindex][im-2];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) y[j][psiindex][1];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = y[j][psiindex];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = y[j][psiindex];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = x[pid][psiindex];
|
||||
j = gp[pid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) x[j][psiindex][im-2];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) x[j][psiindex][1];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = x[j][psiindex];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = x[j][psiindex];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = x[pid][psiindex];
|
||||
t2b = y[pid][psiindex];
|
||||
t2c = z[pid][psiindex];
|
||||
for (i=firstrow;i<=lastrow;i++) {
|
||||
ip1 = i+1;
|
||||
im1 = i-1;
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
t1c = (double *) t2c[i];
|
||||
t1d = (double *) t2b[ip1];
|
||||
t1e = (double *) t2b[im1];
|
||||
t1f = (double *) t2a[ip1];
|
||||
t1g = (double *) t2a[im1];
|
||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
indexp1 = iindex+1;
|
||||
indexm1 = iindex-1;
|
||||
f1 = (t1b[indexm1]+t1d[indexm1]-
|
||||
t1b[indexp1]-t1d[indexp1])*
|
||||
(t1f[iindex]-t1a[iindex]);
|
||||
f2 = (t1e[indexm1]+t1b[indexm1]-
|
||||
t1e[indexp1]-t1b[indexp1])*
|
||||
(t1a[iindex]-t1g[iindex]);
|
||||
f3 = (t1d[iindex]+t1d[indexp1]-
|
||||
t1e[iindex]-t1e[indexp1])*
|
||||
(t1a[indexp1]-t1a[iindex]);
|
||||
f4 = (t1d[indexm1]+t1d[iindex]-
|
||||
t1e[indexm1]-t1e[iindex])*
|
||||
(t1a[iindex]-t1a[indexm1]);
|
||||
f5 = (t1d[iindex]-t1b[indexp1])*
|
||||
(t1f[indexp1]-t1a[iindex]);
|
||||
f6 = (t1b[indexm1]-t1e[iindex])*
|
||||
(t1a[iindex]-t1g[indexm1]);
|
||||
f7 = (t1b[indexp1]-t1e[iindex])*
|
||||
(t1g[indexp1]-t1a[iindex]);
|
||||
f8 = (t1d[iindex]-t1b[indexm1])*
|
||||
(t1a[iindex]-t1f[indexm1]);
|
||||
|
||||
t1c[iindex] = factjacob*(f1+f2+f3+f4+f5+f6+f7+f8);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (gp[pid].neighbors[UP] == -1) {
|
||||
t1c = (double *) t2c[0];
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
t1c[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[pid].neighbors[DOWN] == -1) {
|
||||
t1c = (double *) t2c[im-1];
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
t1c[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[pid].neighbors[LEFT] == -1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
t2c[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[pid].neighbors[RIGHT] == -1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
t2c[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
115
splash2/codes/apps/ocean/contiguous_partitions/laplacalc.C
Normal file
115
splash2/codes/apps/ocean/contiguous_partitions/laplacalc.C
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* Performs the laplacian calculation for a subblock */
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include "decs.h"
|
||||
|
||||
void laplacalc(long procid, double ****x, double ****z, long psiindex, long firstrow, long lastrow, long firstcol, long lastcol)
|
||||
{
|
||||
long iindex;
|
||||
long indexp1;
|
||||
long indexm1;
|
||||
long ip1;
|
||||
long im1;
|
||||
long i;
|
||||
long j;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
double *t1c;
|
||||
double *t1d;
|
||||
|
||||
t2a = (double **) x[procid][psiindex];
|
||||
j = gp[procid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) x[j][psiindex][im-2];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) x[j][psiindex][1];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) x[j][psiindex];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) x[j][psiindex];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = (double **) x[procid][psiindex];
|
||||
t2b = (double **) z[procid][psiindex];
|
||||
for (i=firstrow;i<=lastrow;i++) {
|
||||
ip1 = i+1;
|
||||
im1 = i-1;
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
t1c = (double *) t2a[ip1];
|
||||
t1d = (double *) t2a[im1];
|
||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
indexp1 = iindex+1;
|
||||
indexm1 = iindex-1;
|
||||
t1b[iindex] = factlap*(t1c[iindex]+
|
||||
t1d[iindex]+t1a[indexp1]+
|
||||
t1a[indexm1]-4.*t1a[iindex]);
|
||||
}
|
||||
}
|
||||
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1b = (double *) t2b[0];
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
t1b[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1b = (double *) t2b[im-1];
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
t1b[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
t2b[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
t2b[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
BIN
splash2/codes/apps/ocean/contiguous_partitions/libpthread.a
Normal file
BIN
splash2/codes/apps/ocean/contiguous_partitions/libpthread.a
Normal file
Binary file not shown.
196
splash2/codes/apps/ocean/contiguous_partitions/linkup.C
Normal file
196
splash2/codes/apps/ocean/contiguous_partitions/linkup.C
Normal file
|
@ -0,0 +1,196 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* Set all the pointers to the proper locations for the q_multi and
|
||||
rhs_multi data structures */
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
#include "decs.h"
|
||||
|
||||
void link_all()
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
|
||||
for (j=0;j<nprocs;j++) {
|
||||
linkup(psium[j]);
|
||||
linkup(psilm[j]);
|
||||
linkup(psib[j]);
|
||||
linkup(ga[j]);
|
||||
linkup(gb[j]);
|
||||
linkup(work2[j]);
|
||||
linkup(work3[j]);
|
||||
linkup(work6[j]);
|
||||
linkup(tauz[j]);
|
||||
linkup(oldga[j]);
|
||||
linkup(oldgb[j]);
|
||||
for (i=0;i<=1;i++) {
|
||||
linkup(psi[j][i]);
|
||||
linkup(psim[j][i]);
|
||||
linkup(work1[j][i]);
|
||||
linkup(work4[j][i]);
|
||||
linkup(work5[j][i]);
|
||||
linkup(work7[j][i]);
|
||||
linkup(temparray[j][i]);
|
||||
}
|
||||
}
|
||||
link_multi();
|
||||
}
|
||||
|
||||
void linkup(double **row_ptr)
|
||||
{
|
||||
long i;
|
||||
double *a;
|
||||
double **row;
|
||||
double **y;
|
||||
long x_part;
|
||||
long y_part;
|
||||
|
||||
x_part = (jm-2)/xprocs + 2;
|
||||
y_part = (im-2)/yprocs + 2;
|
||||
row = row_ptr;
|
||||
y = row + y_part;
|
||||
a = (double *) y;
|
||||
for (i=0;i<y_part;i++) {
|
||||
*row = (double *) a;
|
||||
row++;
|
||||
a += x_part;
|
||||
}
|
||||
}
|
||||
|
||||
void link_multi()
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long l;
|
||||
double *a;
|
||||
double **row;
|
||||
double **y;
|
||||
unsigned long z;
|
||||
unsigned long zz;
|
||||
long x_part;
|
||||
long y_part;
|
||||
unsigned long d_size;
|
||||
|
||||
z = ((unsigned long) q_multi + nprocs*sizeof(double ***));
|
||||
|
||||
if (nprocs%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
z += sizeof(double ***);
|
||||
}
|
||||
|
||||
d_size = numlev*sizeof(double **);
|
||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
d_size += sizeof(double **);
|
||||
}
|
||||
for (i=0;i<numlev;i++) {
|
||||
d_size += ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
||||
}
|
||||
for (i=0;i<nprocs;i++) {
|
||||
q_multi[i] = (double ***) z;
|
||||
z += d_size;
|
||||
}
|
||||
for (j=0;j<nprocs;j++) {
|
||||
zz = (unsigned long) q_multi[j];
|
||||
zz += numlev*sizeof(double **);
|
||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
zz += sizeof(double **);
|
||||
}
|
||||
for (i=0;i<numlev;i++) {
|
||||
d_size = ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
||||
q_multi[j][i] = (double **) zz;
|
||||
zz += d_size;
|
||||
}
|
||||
}
|
||||
|
||||
for (l=0;l<numlev;l++) {
|
||||
x_part = (jmx[l]-2)/xprocs + 2;
|
||||
y_part = (imx[l]-2)/yprocs + 2;
|
||||
for (j=0;j<nprocs;j++) {
|
||||
row = q_multi[j][l];
|
||||
y = row + y_part;
|
||||
a = (double *) y;
|
||||
for (i=0;i<y_part;i++) {
|
||||
*row = (double *) a;
|
||||
row++;
|
||||
a += x_part;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
z = ((unsigned long) rhs_multi + nprocs*sizeof(double ***));
|
||||
if (nprocs%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
z += sizeof(double ***);
|
||||
}
|
||||
|
||||
d_size = numlev*sizeof(double **);
|
||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
d_size += sizeof(double **);
|
||||
}
|
||||
for (i=0;i<numlev;i++) {
|
||||
d_size += ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
||||
}
|
||||
for (i=0;i<nprocs;i++) {
|
||||
rhs_multi[i] = (double ***) z;
|
||||
z += d_size;
|
||||
}
|
||||
for (j=0;j<nprocs;j++) {
|
||||
zz = (unsigned long) rhs_multi[j];
|
||||
zz += numlev*sizeof(double **);
|
||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
zz += sizeof(double **);
|
||||
}
|
||||
for (i=0;i<numlev;i++) {
|
||||
d_size = ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
||||
rhs_multi[j][i] = (double **) zz;
|
||||
zz += d_size;
|
||||
}
|
||||
}
|
||||
|
||||
for (l=0;l<numlev;l++) {
|
||||
x_part = (jmx[l]-2)/xprocs + 2;
|
||||
y_part = (imx[l]-2)/yprocs + 2;
|
||||
for (j=0;j<nprocs;j++) {
|
||||
row = rhs_multi[j][l];
|
||||
y = row + y_part;
|
||||
a = (double *) y;
|
||||
for (i=0;i<y_part;i++) {
|
||||
*row = (double *) a;
|
||||
row++;
|
||||
a += x_part;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
BIN
splash2/codes/apps/ocean/contiguous_partitions/m5op_x86.o
Normal file
BIN
splash2/codes/apps/ocean/contiguous_partitions/m5op_x86.o
Normal file
Binary file not shown.
566
splash2/codes/apps/ocean/contiguous_partitions/main.C
Normal file
566
splash2/codes/apps/ocean/contiguous_partitions/main.C
Normal file
|
@ -0,0 +1,566 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/*************************************************************************/
|
||||
/* */
|
||||
/* SPLASH Ocean Code */
|
||||
/* */
|
||||
/* This application studies the role of eddy and boundary currents in */
|
||||
/* influencing large-scale ocean movements. This implementation uses */
|
||||
/* dynamically allocated four-dimensional arrays for grid data storage. */
|
||||
/* */
|
||||
/* Command line options: */
|
||||
/* */
|
||||
/* -nN : Simulate NxN ocean. N must be (power of 2)+2. */
|
||||
/* -pP : P = number of processors. P must be power of 2. */
|
||||
/* -eE : E = error tolerance for iterative relaxation. */
|
||||
/* -rR : R = distance between grid points in meters. */
|
||||
/* -tT : T = timestep in seconds. */
|
||||
/* -s : Print timing statistics. */
|
||||
/* -o : Print out relaxation residual values. */
|
||||
/* -h : Print out command line options. */
|
||||
/* */
|
||||
/* Default: OCEAN -n130 -p1 -e1e-7 -r20000.0 -t28800.0 */
|
||||
/* */
|
||||
/* NOTE: This code works under both the FORK and SPROC models. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
MAIN_ENV
|
||||
|
||||
#define DEFAULT_N 258
|
||||
#define DEFAULT_P 1
|
||||
#define DEFAULT_E 1e-7
|
||||
#define DEFAULT_T 28800.0
|
||||
#define DEFAULT_R 20000.0
|
||||
#define UP 0
|
||||
#define DOWN 1
|
||||
#define LEFT 2
|
||||
#define RIGHT 3
|
||||
#define UPLEFT 4
|
||||
#define UPRIGHT 5
|
||||
#define DOWNLEFT 6
|
||||
#define DOWNRIGHT 7
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include "decs.h"
|
||||
|
||||
struct multi_struct *multi;
|
||||
struct global_struct *global;
|
||||
struct locks_struct *locks;
|
||||
struct bars_struct *bars;
|
||||
|
||||
double ****psi;
|
||||
double ****psim;
|
||||
double ***psium;
|
||||
double ***psilm;
|
||||
double ***psib;
|
||||
double ***ga;
|
||||
double ***gb;
|
||||
double ****work1;
|
||||
double ***work2;
|
||||
double ***work3;
|
||||
double ****work4;
|
||||
double ****work5;
|
||||
double ***work6;
|
||||
double ****work7;
|
||||
double ****temparray;
|
||||
double ***tauz;
|
||||
double ***oldga;
|
||||
double ***oldgb;
|
||||
double *f;
|
||||
double ****q_multi;
|
||||
double ****rhs_multi;
|
||||
|
||||
long nprocs = DEFAULT_P;
|
||||
double h1 = 1000.0;
|
||||
double h3 = 4000.0;
|
||||
double h = 5000.0;
|
||||
double lf = -5.12e11;
|
||||
double res = DEFAULT_R;
|
||||
double dtau = DEFAULT_T;
|
||||
double f0 = 8.3e-5;
|
||||
double beta = 2.0e-11;
|
||||
double gpr = 0.02;
|
||||
long im = DEFAULT_N;
|
||||
long jm;
|
||||
double tolerance = DEFAULT_E;
|
||||
double eig2;
|
||||
double ysca;
|
||||
long jmm1;
|
||||
double pi;
|
||||
double t0 = 0.5e-4 ;
|
||||
double outday0 = 1.0;
|
||||
double outday1 = 2.0;
|
||||
double outday2 = 2.0;
|
||||
double outday3 = 2.0;
|
||||
double factjacob;
|
||||
double factlap;
|
||||
long numlev;
|
||||
long *imx;
|
||||
long *jmx;
|
||||
double *lev_res;
|
||||
double *lev_tol;
|
||||
double maxwork = 10000.0;
|
||||
|
||||
struct Global_Private *gp;
|
||||
|
||||
double *i_int_coeff;
|
||||
double *j_int_coeff;
|
||||
long xprocs;
|
||||
long yprocs;
|
||||
long *xpts_per_proc;
|
||||
long *ypts_per_proc;
|
||||
long minlevel;
|
||||
long do_stats = 0;
|
||||
long do_output = 0;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long k;
|
||||
long x_part;
|
||||
long y_part;
|
||||
long d_size;
|
||||
long itemp;
|
||||
long jtemp;
|
||||
double procsqrt;
|
||||
long temp = 0;
|
||||
double min_total;
|
||||
double max_total;
|
||||
double avg_total;
|
||||
double min_multi;
|
||||
double max_multi;
|
||||
double avg_multi;
|
||||
double min_frac;
|
||||
double max_frac;
|
||||
double avg_frac;
|
||||
long ch;
|
||||
extern char *optarg;
|
||||
unsigned long computeend;
|
||||
unsigned long start;
|
||||
|
||||
CLOCK(start)
|
||||
|
||||
while ((ch = getopt(argc, argv, "n:p:e:r:t:soh")) != -1) {
|
||||
switch(ch) {
|
||||
case 'n': im = atoi(optarg);
|
||||
if (log_2(im-2) == -1) {
|
||||
printerr("Grid must be ((power of 2)+2) in each dimension\n");
|
||||
exit(-1);
|
||||
}
|
||||
break;
|
||||
case 'p': nprocs = atoi(optarg);
|
||||
if (nprocs < 1) {
|
||||
printerr("P must be >= 1\n");
|
||||
exit(-1);
|
||||
}
|
||||
if (log_2(nprocs) == -1) {
|
||||
printerr("P must be a power of 2\n");
|
||||
exit(-1);
|
||||
}
|
||||
break;
|
||||
case 'e': tolerance = atof(optarg); break;
|
||||
case 'r': res = atof(optarg); break;
|
||||
case 't': dtau = atof(optarg); break;
|
||||
case 's': do_stats = !do_stats; break;
|
||||
case 'o': do_output = !do_output; break;
|
||||
case 'h': printf("Usage: OCEAN <options>\n\n");
|
||||
printf("options:\n");
|
||||
printf(" -nN : Simulate NxN ocean. N must be (power of 2)+2.\n");
|
||||
printf(" -pP : P = number of processors. P must be power of 2.\n");
|
||||
printf(" -eE : E = error tolerance for iterative relaxation.\n");
|
||||
printf(" -rR : R = distance between grid points in meters.\n");
|
||||
printf(" -tT : T = timestep in seconds.\n");
|
||||
printf(" -s : Print timing statistics.\n");
|
||||
printf(" -o : Print out relaxation residual values.\n");
|
||||
printf(" -h : Print out command line options.\n\n");
|
||||
printf("Default: OCEAN -n%1d -p%1d -e%1g -r%1g -t%1g\n",
|
||||
DEFAULT_N,DEFAULT_P,DEFAULT_E,DEFAULT_R,DEFAULT_T);
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MAIN_INITENV(,60000000)
|
||||
|
||||
jm = im;
|
||||
printf("\n");
|
||||
printf("Ocean simulation with W-cycle multigrid solver\n");
|
||||
printf(" Processors : %1ld\n",nprocs);
|
||||
printf(" Grid size : %1ld x %1ld\n",im,jm);
|
||||
printf(" Grid resolution (meters) : %0.2f\n",res);
|
||||
printf(" Time between relaxations (seconds) : %0.0f\n",dtau);
|
||||
printf(" Error tolerance : %0.7g\n",tolerance);
|
||||
printf("\n");
|
||||
|
||||
xprocs = 0;
|
||||
yprocs = 0;
|
||||
procsqrt = sqrt((double) nprocs);
|
||||
j = (long) procsqrt;
|
||||
while ((xprocs == 0) && (j > 0)) {
|
||||
k = nprocs / j;
|
||||
if (k * j == nprocs) {
|
||||
if (k > j) {
|
||||
xprocs = j;
|
||||
yprocs = k;
|
||||
} else {
|
||||
xprocs = k;
|
||||
yprocs = j;
|
||||
}
|
||||
}
|
||||
j--;
|
||||
}
|
||||
if (xprocs == 0) {
|
||||
printerr("Could not find factors for subblocking\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
minlevel = 0;
|
||||
itemp = 1;
|
||||
jtemp = 1;
|
||||
numlev = 0;
|
||||
minlevel = 0;
|
||||
while (itemp < (im-2)) {
|
||||
itemp = itemp*2;
|
||||
jtemp = jtemp*2;
|
||||
if ((itemp/yprocs > 1) && (jtemp/xprocs > 1)) {
|
||||
numlev++;
|
||||
}
|
||||
}
|
||||
|
||||
if (numlev == 0) {
|
||||
printerr("Must have at least 2 grid points per processor in each dimension\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
imx = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
jmx = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
lev_res = (double *) G_MALLOC(numlev*sizeof(double));
|
||||
lev_tol = (double *) G_MALLOC(numlev*sizeof(double));
|
||||
i_int_coeff = (double *) G_MALLOC(numlev*sizeof(double));
|
||||
j_int_coeff = (double *) G_MALLOC(numlev*sizeof(double));
|
||||
xpts_per_proc = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
ypts_per_proc = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
|
||||
imx[numlev-1] = im;
|
||||
jmx[numlev-1] = jm;
|
||||
lev_res[numlev-1] = res;
|
||||
lev_tol[numlev-1] = tolerance;
|
||||
|
||||
for (i=numlev-2;i>=0;i--) {
|
||||
imx[i] = ((imx[i+1] - 2) / 2) + 2;
|
||||
jmx[i] = ((jmx[i+1] - 2) / 2) + 2;
|
||||
lev_res[i] = lev_res[i+1] * 2;
|
||||
}
|
||||
|
||||
for (i=0;i<numlev;i++) {
|
||||
xpts_per_proc[i] = (jmx[i]-2) / xprocs;
|
||||
ypts_per_proc[i] = (imx[i]-2) / yprocs;
|
||||
}
|
||||
for (i=numlev-1;i>=0;i--) {
|
||||
if ((xpts_per_proc[i] < 2) || (ypts_per_proc[i] < 2)) {
|
||||
minlevel = i+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (i=0;i<numlev;i++) {
|
||||
temp += imx[i];
|
||||
}
|
||||
temp = 0;
|
||||
j = 0;
|
||||
for (k=0;k<numlev;k++) {
|
||||
for (i=0;i<imx[k];i++) {
|
||||
j++;
|
||||
temp += jmx[k];
|
||||
}
|
||||
}
|
||||
|
||||
d_size = nprocs*sizeof(double ***);
|
||||
psi = (double ****) G_MALLOC(d_size);
|
||||
psim = (double ****) G_MALLOC(d_size);
|
||||
work1 = (double ****) G_MALLOC(d_size);
|
||||
work4 = (double ****) G_MALLOC(d_size);
|
||||
work5 = (double ****) G_MALLOC(d_size);
|
||||
work7 = (double ****) G_MALLOC(d_size);
|
||||
temparray = (double ****) G_MALLOC(d_size);
|
||||
|
||||
d_size = 2*sizeof(double **);
|
||||
for (i=0;i<nprocs;i++) {
|
||||
psi[i] = (double ***) G_MALLOC(d_size);
|
||||
psim[i] = (double ***) G_MALLOC(d_size);
|
||||
work1[i] = (double ***) G_MALLOC(d_size);
|
||||
work4[i] = (double ***) G_MALLOC(d_size);
|
||||
work5[i] = (double ***) G_MALLOC(d_size);
|
||||
work7[i] = (double ***) G_MALLOC(d_size);
|
||||
temparray[i] = (double ***) G_MALLOC(d_size);
|
||||
}
|
||||
|
||||
d_size = nprocs*sizeof(double **);
|
||||
psium = (double ***) G_MALLOC(d_size);
|
||||
psilm = (double ***) G_MALLOC(d_size);
|
||||
psib = (double ***) G_MALLOC(d_size);
|
||||
ga = (double ***) G_MALLOC(d_size);
|
||||
gb = (double ***) G_MALLOC(d_size);
|
||||
work2 = (double ***) G_MALLOC(d_size);
|
||||
work3 = (double ***) G_MALLOC(d_size);
|
||||
work6 = (double ***) G_MALLOC(d_size);
|
||||
tauz = (double ***) G_MALLOC(d_size);
|
||||
oldga = (double ***) G_MALLOC(d_size);
|
||||
oldgb = (double ***) G_MALLOC(d_size);
|
||||
|
||||
gp = (struct Global_Private *) G_MALLOC((nprocs+1)*sizeof(struct Global_Private));
|
||||
for (i=0;i<nprocs;i++) {
|
||||
gp[i].rel_num_x = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].rel_num_y = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].eist = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].ejst = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].oist = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].ojst = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].rlist = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].rljst = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].rlien = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].rljen = (long *) G_MALLOC(numlev*sizeof(long));
|
||||
gp[i].multi_time = 0;
|
||||
gp[i].total_time = 0;
|
||||
}
|
||||
|
||||
subblock();
|
||||
|
||||
x_part = (jm - 2)/xprocs + 2;
|
||||
y_part = (im - 2)/yprocs + 2;
|
||||
|
||||
d_size = x_part*y_part*sizeof(double) + y_part*sizeof(double *);
|
||||
|
||||
global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct));
|
||||
for (i=0;i<nprocs;i++) {
|
||||
psi[i][0] = (double **) G_MALLOC(d_size);
|
||||
psi[i][1] = (double **) G_MALLOC(d_size);
|
||||
psim[i][0] = (double **) G_MALLOC(d_size);
|
||||
psim[i][1] = (double **) G_MALLOC(d_size);
|
||||
psium[i] = (double **) G_MALLOC(d_size);
|
||||
psilm[i] = (double **) G_MALLOC(d_size);
|
||||
psib[i] = (double **) G_MALLOC(d_size);
|
||||
ga[i] = (double **) G_MALLOC(d_size);
|
||||
gb[i] = (double **) G_MALLOC(d_size);
|
||||
work1[i][0] = (double **) G_MALLOC(d_size);
|
||||
work1[i][1] = (double **) G_MALLOC(d_size);
|
||||
work2[i] = (double **) G_MALLOC(d_size);
|
||||
work3[i] = (double **) G_MALLOC(d_size);
|
||||
work4[i][0] = (double **) G_MALLOC(d_size);
|
||||
work4[i][1] = (double **) G_MALLOC(d_size);
|
||||
work5[i][0] = (double **) G_MALLOC(d_size);
|
||||
work5[i][1] = (double **) G_MALLOC(d_size);
|
||||
work6[i] = (double **) G_MALLOC(d_size);
|
||||
work7[i][0] = (double **) G_MALLOC(d_size);
|
||||
work7[i][1] = (double **) G_MALLOC(d_size);
|
||||
temparray[i][0] = (double **) G_MALLOC(d_size);
|
||||
temparray[i][1] = (double **) G_MALLOC(d_size);
|
||||
tauz[i] = (double **) G_MALLOC(d_size);
|
||||
oldga[i] = (double **) G_MALLOC(d_size);
|
||||
oldgb[i] = (double **) G_MALLOC(d_size);
|
||||
}
|
||||
f = (double *) G_MALLOC(im*sizeof(double));
|
||||
|
||||
multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct));
|
||||
|
||||
d_size = numlev*sizeof(double **);
|
||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
d_size += sizeof(double **);
|
||||
}
|
||||
for (i=0;i<numlev;i++) {
|
||||
d_size += ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
||||
}
|
||||
|
||||
d_size *= nprocs;
|
||||
|
||||
if (nprocs%2 == 1) { /* To make sure that the actual data
|
||||
starts double word aligned, add an extra
|
||||
pointer */
|
||||
d_size += sizeof(double ***);
|
||||
}
|
||||
|
||||
d_size += nprocs*sizeof(double ***);
|
||||
q_multi = (double ****) G_MALLOC(d_size);
|
||||
rhs_multi = (double ****) G_MALLOC(d_size);
|
||||
|
||||
locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct));
|
||||
bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct));
|
||||
|
||||
LOCKINIT(locks->idlock)
|
||||
LOCKINIT(locks->psiailock)
|
||||
LOCKINIT(locks->psibilock)
|
||||
LOCKINIT(locks->donelock)
|
||||
LOCKINIT(locks->error_lock)
|
||||
LOCKINIT(locks->bar_lock)
|
||||
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARINIT(bars->iteration, nprocs)
|
||||
BARINIT(bars->gsudn, nprocs)
|
||||
BARINIT(bars->p_setup, nprocs)
|
||||
BARINIT(bars->p_redph, nprocs)
|
||||
BARINIT(bars->p_soln, nprocs)
|
||||
BARINIT(bars->p_subph, nprocs)
|
||||
BARINIT(bars->sl_prini, nprocs)
|
||||
BARINIT(bars->sl_psini, nprocs)
|
||||
BARINIT(bars->sl_onetime, nprocs)
|
||||
BARINIT(bars->sl_phase_1, nprocs)
|
||||
BARINIT(bars->sl_phase_2, nprocs)
|
||||
BARINIT(bars->sl_phase_3, nprocs)
|
||||
BARINIT(bars->sl_phase_4, nprocs)
|
||||
BARINIT(bars->sl_phase_5, nprocs)
|
||||
BARINIT(bars->sl_phase_6, nprocs)
|
||||
BARINIT(bars->sl_phase_7, nprocs)
|
||||
BARINIT(bars->sl_phase_8, nprocs)
|
||||
BARINIT(bars->sl_phase_9, nprocs)
|
||||
BARINIT(bars->sl_phase_10, nprocs)
|
||||
BARINIT(bars->error_barrier, nprocs)
|
||||
#else
|
||||
BARINIT(bars->barrier, nprocs)
|
||||
#endif
|
||||
|
||||
link_all();
|
||||
|
||||
multi->err_multi = 0.0;
|
||||
i_int_coeff[0] = 0.0;
|
||||
j_int_coeff[0] = 0.0;
|
||||
for (i=0;i<numlev;i++) {
|
||||
i_int_coeff[i] = 1.0/(imx[i]-1);
|
||||
j_int_coeff[i] = 1.0/(jmx[i]-1);
|
||||
}
|
||||
|
||||
/* initialize constants and variables
|
||||
|
||||
id is a global shared variable that has fetch-and-add operations
|
||||
performed on it by processes to obtain their pids. */
|
||||
|
||||
global->id = 0;
|
||||
global->psibi = 0.0;
|
||||
pi = atan(1.0);
|
||||
pi = 4.*pi;
|
||||
|
||||
factjacob = -1./(12.*res*res);
|
||||
factlap = 1./(res*res);
|
||||
eig2 = -h*f0*f0/(h1*h3*gpr);
|
||||
|
||||
jmm1 = jm-1 ;
|
||||
ysca = ((double) jmm1)*res ;
|
||||
|
||||
im = (imx[numlev-1]-2)/yprocs + 2;
|
||||
jm = (jmx[numlev-1]-2)/xprocs + 2;
|
||||
|
||||
if (do_output) {
|
||||
printf(" MULTIGRID OUTPUTS\n");
|
||||
}
|
||||
|
||||
CREATE(slave, nprocs);
|
||||
WAIT_FOR_END(nprocs);
|
||||
CLOCK(computeend)
|
||||
|
||||
printf("\n");
|
||||
printf(" PROCESS STATISTICS\n");
|
||||
printf(" Total Multigrid Multigrid\n");
|
||||
printf(" Proc Time Time Fraction\n");
|
||||
printf(" 0 %15.0f %15.0f %10.3f\n", gp[0].total_time,gp[0].multi_time, gp[0].multi_time/gp[0].total_time);
|
||||
|
||||
if (do_stats) {
|
||||
min_total = max_total = avg_total = gp[0].total_time;
|
||||
min_multi = max_multi = avg_multi = gp[0].multi_time;
|
||||
min_frac = max_frac = avg_frac = gp[0].multi_time/gp[0].total_time;
|
||||
for (i=1;i<nprocs;i++) {
|
||||
if (gp[i].total_time > max_total) {
|
||||
max_total = gp[i].total_time;
|
||||
}
|
||||
if (gp[i].total_time < min_total) {
|
||||
min_total = gp[i].total_time;
|
||||
}
|
||||
if (gp[i].multi_time > max_multi) {
|
||||
max_multi = gp[i].multi_time;
|
||||
}
|
||||
if (gp[i].multi_time < min_multi) {
|
||||
min_multi = gp[i].multi_time;
|
||||
}
|
||||
if (gp[i].multi_time/gp[i].total_time > max_frac) {
|
||||
max_frac = gp[i].multi_time/gp[i].total_time;
|
||||
}
|
||||
if (gp[i].multi_time/gp[i].total_time < min_frac) {
|
||||
min_frac = gp[i].multi_time/gp[i].total_time;
|
||||
}
|
||||
avg_total += gp[i].total_time;
|
||||
avg_multi += gp[i].multi_time;
|
||||
avg_frac += gp[i].multi_time/gp[i].total_time;
|
||||
}
|
||||
avg_total = avg_total / nprocs;
|
||||
avg_multi = avg_multi / nprocs;
|
||||
avg_frac = avg_frac / nprocs;
|
||||
for (i=1;i<nprocs;i++) {
|
||||
printf(" %3ld %15.0f %15.0f %10.3f\n", i,gp[i].total_time,gp[i].multi_time, gp[i].multi_time/gp[i].total_time);
|
||||
}
|
||||
printf(" Avg %15.0f %15.0f %10.3f\n", avg_total,avg_multi,avg_frac);
|
||||
printf(" Min %15.0f %15.0f %10.3f\n", min_total,min_multi,min_frac);
|
||||
printf(" Max %15.0f %15.0f %10.3f\n", max_total,max_multi,max_frac);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
global->starttime = start;
|
||||
printf(" TIMING INFORMATION\n");
|
||||
printf("Start time : %16lu\n", global->starttime);
|
||||
printf("Initialization finish time : %16lu\n", global->trackstart);
|
||||
printf("Overall finish time : %16lu\n", computeend);
|
||||
printf("Total time with initialization : %16lu\n", computeend-global->starttime);
|
||||
printf("Total time without initialization : %16lu\n", computeend-global->trackstart);
|
||||
printf(" (excludes first timestep)\n");
|
||||
printf("\n");
|
||||
|
||||
MAIN_END
|
||||
}
|
||||
|
||||
long log_2(long number)
|
||||
{
|
||||
long cumulative = 1;
|
||||
long out = 0;
|
||||
long done = 0;
|
||||
|
||||
while ((cumulative < number) && (!done) && (out < 50)) {
|
||||
if (cumulative == number) {
|
||||
done = 1;
|
||||
} else {
|
||||
cumulative = cumulative * 2;
|
||||
out ++;
|
||||
}
|
||||
}
|
||||
|
||||
if (cumulative == number) {
|
||||
return(out);
|
||||
} else {
|
||||
return(-1);
|
||||
}
|
||||
}
|
||||
|
||||
void printerr(char *s)
|
||||
{
|
||||
fprintf(stderr,"ERROR: %s\n",s);
|
||||
}
|
||||
|
816
splash2/codes/apps/ocean/contiguous_partitions/multi.C
Normal file
816
splash2/codes/apps/ocean/contiguous_partitions/multi.C
Normal file
|
@ -0,0 +1,816 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* Shared memory implementation of the multigrid method
|
||||
Implementation uses red-black gauss-seidel relaxation
|
||||
iterations, w cycles, and the method of half-injection for
|
||||
residual computation. */
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include "decs.h"
|
||||
|
||||
/* perform multigrid (w cycles) */
|
||||
void multig(long my_id)
|
||||
{
|
||||
long iter;
|
||||
double wu;
|
||||
double errp;
|
||||
long m;
|
||||
long flag1;
|
||||
long flag2;
|
||||
long k;
|
||||
long my_num;
|
||||
double wmax;
|
||||
double local_err;
|
||||
double red_local_err;
|
||||
double black_local_err;
|
||||
double g_error;
|
||||
|
||||
flag1 = 0;
|
||||
flag2 = 0;
|
||||
iter = 0;
|
||||
m = numlev-1;
|
||||
wmax = maxwork;
|
||||
my_num = my_id;
|
||||
wu = 0.0;
|
||||
|
||||
k = m;
|
||||
g_error = 1.0e30;
|
||||
while ((!flag1) && (!flag2)) {
|
||||
errp = g_error;
|
||||
iter++;
|
||||
if (my_num == MASTER) {
|
||||
multi->err_multi = 0.0;
|
||||
}
|
||||
|
||||
/* barrier to make sure all procs have finished intadd or rescal */
|
||||
/* before proceeding with relaxation */
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
copy_black(k,my_num);
|
||||
|
||||
relax(k,&red_local_err,RED_ITER,my_num);
|
||||
|
||||
/* barrier to make sure all red computations have been performed */
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
copy_red(k,my_num);
|
||||
|
||||
relax(k,&black_local_err,BLACK_ITER,my_num);
|
||||
|
||||
/* compute max local error from red_local_err and black_local_err */
|
||||
|
||||
if (red_local_err > black_local_err) {
|
||||
local_err = red_local_err;
|
||||
} else {
|
||||
local_err = black_local_err;
|
||||
}
|
||||
|
||||
/* update the global error if necessary */
|
||||
|
||||
LOCK(locks->error_lock)
|
||||
if (local_err > multi->err_multi) {
|
||||
multi->err_multi = local_err;
|
||||
}
|
||||
UNLOCK(locks->error_lock)
|
||||
|
||||
/* a single relaxation sweep at the finest level is one unit of */
|
||||
/* work */
|
||||
|
||||
wu+=pow((double)4.0,(double)k-m);
|
||||
|
||||
/* barrier to make sure all processors have checked local error */
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
g_error = multi->err_multi;
|
||||
|
||||
/* barrier to make sure master does not cycle back to top of loop */
|
||||
/* and reset global->err before we read it and decide what to do */
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
|
||||
if (g_error >= lev_tol[k]) {
|
||||
if (wu > wmax) {
|
||||
/* max work exceeded */
|
||||
flag1 = 1;
|
||||
fprintf(stderr,"ERROR: Maximum work limit %0.5f exceeded\n",wmax);
|
||||
exit(-1);
|
||||
} else {
|
||||
/* if we have not converged */
|
||||
if ((k != 0) && (g_error/errp >= 0.6) &&
|
||||
(k > minlevel)) {
|
||||
/* if need to go to coarser grid */
|
||||
|
||||
copy_borders(k,my_num);
|
||||
copy_rhs_borders(k,my_num);
|
||||
|
||||
/* This bar is needed because the routine rescal uses the neighbor's
|
||||
border points to compute s4. We must ensure that the neighbor's
|
||||
border points have been written before we try computing the new
|
||||
rescal values */
|
||||
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
|
||||
rescal(k,my_num);
|
||||
|
||||
/* transfer residual to rhs of coarser grid */
|
||||
lev_tol[k-1] = 0.3 * g_error;
|
||||
k = k-1;
|
||||
putz(k,my_num);
|
||||
/* make initial guess on coarser grid zero */
|
||||
g_error = 1.0e30;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* if we have converged at this level */
|
||||
if (k == m) {
|
||||
/* if finest grid, we are done */
|
||||
flag2 = 1;
|
||||
} else {
|
||||
/* else go to next finest grid */
|
||||
|
||||
copy_borders(k,my_num);
|
||||
|
||||
intadd(k,my_num);
|
||||
/* changes the grid values at the finer level. rhs at finer level */
|
||||
/* remains what it already is */
|
||||
k++;
|
||||
g_error = 1.0e30;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (do_output) {
|
||||
if (my_num == MASTER) {
|
||||
printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter,k,multi->err_multi,wu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* perform red or black iteration (not both) */
|
||||
void relax(long k, double *err, long color, long my_num)
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long iend;
|
||||
long jend;
|
||||
long oddistart;
|
||||
long oddjstart;
|
||||
long evenistart;
|
||||
long evenjstart;
|
||||
double a;
|
||||
double h;
|
||||
double factor;
|
||||
double maxerr;
|
||||
double newerr;
|
||||
double oldval;
|
||||
double newval;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
double *t1c;
|
||||
double *t1d;
|
||||
|
||||
i = 0;
|
||||
j = 0;
|
||||
|
||||
*err = 0.0;
|
||||
h = lev_res[k];
|
||||
|
||||
/* points whose sum of row and col index is even do a red iteration, */
|
||||
/* others do a black */
|
||||
|
||||
evenistart = gp[my_num].eist[k];
|
||||
evenjstart = gp[my_num].ejst[k];
|
||||
oddistart = gp[my_num].oist[k];
|
||||
oddjstart = gp[my_num].ojst[k];
|
||||
|
||||
iend = gp[my_num].rlien[k];
|
||||
jend = gp[my_num].rljen[k];
|
||||
|
||||
factor = 4.0 - eig2 * h * h ;
|
||||
maxerr = 0.0;
|
||||
t2a = (double **) q_multi[my_num][k];
|
||||
t2b = (double **) rhs_multi[my_num][k];
|
||||
if (color == RED_ITER) {
|
||||
for (i=evenistart;i<iend;i+=2) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
t1c = (double *) t2a[i-1];
|
||||
t1d = (double *) t2a[i+1];
|
||||
for (j=evenjstart;j<jend;j+=2) {
|
||||
a = t1a[j+1] + t1a[j-1] +
|
||||
t1c[j] + t1d[j] -
|
||||
t1b[j] ;
|
||||
oldval = t1a[j];
|
||||
newval = a / factor;
|
||||
newerr = oldval - newval;
|
||||
t1a[j] = newval;
|
||||
if (fabs(newerr) > maxerr) {
|
||||
maxerr = fabs(newerr);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i=oddistart;i<iend;i+=2) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
t1c = (double *) t2a[i-1];
|
||||
t1d = (double *) t2a[i+1];
|
||||
for (j=oddjstart;j<jend;j+=2) {
|
||||
a = t1a[j+1] + t1a[j-1] +
|
||||
t1c[j] + t1d[j] -
|
||||
t1b[j] ;
|
||||
oldval = t1a[j];
|
||||
newval = a / factor;
|
||||
newerr = oldval - newval;
|
||||
t1a[j] = newval;
|
||||
if (fabs(newerr) > maxerr) {
|
||||
maxerr = fabs(newerr);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (color == BLACK_ITER) {
|
||||
for (i=evenistart;i<iend;i+=2) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
t1c = (double *) t2a[i-1];
|
||||
t1d = (double *) t2a[i+1];
|
||||
for (j=oddjstart;j<jend;j+=2) {
|
||||
a = t1a[j+1] + t1a[j-1] +
|
||||
t1c[j] + t1d[j] -
|
||||
t1b[j] ;
|
||||
oldval = t1a[j];
|
||||
newval = a / factor;
|
||||
newerr = oldval - newval;
|
||||
t1a[j] = newval;
|
||||
if (fabs(newerr) > maxerr) {
|
||||
maxerr = fabs(newerr);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i=oddistart;i<iend;i+=2) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
t1c = (double *) t2a[i-1];
|
||||
t1d = (double *) t2a[i+1];
|
||||
for (j=evenjstart;j<jend;j+=2) {
|
||||
a = t1a[j+1] + t1a[j-1] +
|
||||
t1c[j] + t1d[j] -
|
||||
t1b[j] ;
|
||||
oldval = t1a[j];
|
||||
newval = a / factor;
|
||||
newerr = oldval - newval;
|
||||
t1a[j] = newval;
|
||||
if (fabs(newerr) > maxerr) {
|
||||
maxerr = fabs(newerr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*err = maxerr;
|
||||
}
|
||||
|
||||
/* perform half-injection to next coarsest level */
|
||||
void rescal(long kf, long my_num)
|
||||
{
|
||||
long ic;
|
||||
long if17;
|
||||
long jf;
|
||||
long jc;
|
||||
long krc;
|
||||
long istart;
|
||||
long iend;
|
||||
long jstart;
|
||||
long jend;
|
||||
double hf;
|
||||
double hc;
|
||||
double s;
|
||||
double s1;
|
||||
double s2;
|
||||
double s3;
|
||||
double s4;
|
||||
double factor;
|
||||
double int1;
|
||||
double int2;
|
||||
double i_int_factor;
|
||||
double j_int_factor;
|
||||
double int_val;
|
||||
long i_off;
|
||||
long j_off;
|
||||
long up_proc;
|
||||
long left_proc;
|
||||
long im;
|
||||
long jm;
|
||||
double temp;
|
||||
double temp2;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double **t2c;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
double *t1c;
|
||||
double *t1d;
|
||||
double *t1e;
|
||||
double *t1f;
|
||||
double *t1g;
|
||||
double *t1h;
|
||||
|
||||
krc = kf - 1;
|
||||
hc = lev_res[krc];
|
||||
hf = lev_res[kf];
|
||||
i_off = gp[my_num].rownum*ypts_per_proc[krc];
|
||||
j_off = gp[my_num].colnum*xpts_per_proc[krc];
|
||||
up_proc = gp[my_num].neighbors[UP];
|
||||
left_proc = gp[my_num].neighbors[LEFT];
|
||||
im = (imx[kf]-2)/yprocs;
|
||||
jm = (jmx[kf]-2)/xprocs;
|
||||
|
||||
istart = gp[my_num].rlist[krc];
|
||||
jstart = gp[my_num].rljst[krc];
|
||||
iend = gp[my_num].rlien[krc] - 1;
|
||||
jend = gp[my_num].rljen[krc] - 1;
|
||||
|
||||
factor = 4.0 - eig2 * hf * hf;
|
||||
|
||||
t2a = (double **) q_multi[my_num][kf];
|
||||
t2b = (double **) rhs_multi[my_num][kf];
|
||||
t2c = (double **) rhs_multi[my_num][krc];
|
||||
if17=2*(istart-1);
|
||||
for(ic=istart;ic<=iend;ic++) {
|
||||
if17+=2;
|
||||
i_int_factor = (ic+i_off) * i_int_coeff[krc] * 0.5;
|
||||
jf = 2 * (jstart - 1);
|
||||
t1a = (double *) t2a[if17];
|
||||
t1b = (double *) t2b[if17];
|
||||
t1c = (double *) t2c[ic];
|
||||
t1d = (double *) t2a[if17-1];
|
||||
t1e = (double *) t2a[if17+1];
|
||||
t1f = (double *) t2a[if17-2];
|
||||
t1g = (double *) t2a[if17-3];
|
||||
t1h = (double *) t2b[if17-2];
|
||||
for(jc=jstart;jc<=jend;jc++) {
|
||||
jf+=2;
|
||||
j_int_factor = (jc+j_off)*j_int_coeff[krc] * 0.5;
|
||||
|
||||
/* method of half-injection uses 2.0 instead of 4.0 */
|
||||
|
||||
/* do bilinear interpolation */
|
||||
s = t1a[jf+1] + t1a[jf-1] + t1d[jf] + t1e[jf];
|
||||
s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]);
|
||||
if (((if17 == 2) && (gp[my_num].neighbors[UP] == -1)) ||
|
||||
((jf == 2) && (gp[my_num].neighbors[LEFT] == -1))) {
|
||||
s2 = 0;
|
||||
s3 = 0;
|
||||
s4 = 0;
|
||||
} else if ((if17 == 2) || (jf == 2)) {
|
||||
if (jf == 2) {
|
||||
temp = q_multi[left_proc][kf][if17][jm-1];
|
||||
} else {
|
||||
temp = t1a[jf-3];
|
||||
}
|
||||
s = t1a[jf-1] + temp + t1d[jf-2] + t1e[jf-2];
|
||||
s2 = 2.0 * (t1b[jf-2] - s + factor * t1a[jf-2]);
|
||||
if (if17 == 2) {
|
||||
temp = q_multi[up_proc][kf][im-1][jf];
|
||||
} else {
|
||||
temp = t1g[jf];
|
||||
}
|
||||
s = t1f[jf+1]+ t1f[jf-1]+ temp + t1d[jf];
|
||||
s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);
|
||||
if (jf == 2) {
|
||||
temp = q_multi[left_proc][kf][if17-2][jm-1];
|
||||
} else {
|
||||
temp = t1f[jf-3];
|
||||
}
|
||||
if (if17 == 2) {
|
||||
temp2 = q_multi[up_proc][kf][im-1][jf-2];
|
||||
} else {
|
||||
temp2 = t1g[jf-2];
|
||||
}
|
||||
s = t1f[jf-1]+ temp + temp2 + t1d[jf-2];
|
||||
s4 = 2.0 * (t1h[jf-2] - s + factor * t1f[jf-2]);
|
||||
} else {
|
||||
s = t1a[jf-1] + t1a[jf-3] + t1d[jf-2] + t1e[jf-2];
|
||||
s2 = 2.0 * (t1b[jf-2] - s + factor * t1a[jf-2]);
|
||||
s = t1f[jf+1]+ t1f[jf-1]+ t1g[jf] + t1d[jf];
|
||||
s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);
|
||||
s = t1f[jf-1]+ t1f[jf-3]+ t1g[jf-2]+ t1d[jf-2];
|
||||
s4 = 2.0 * (t1h[jf-2] - s + factor * t1f[jf-2]);
|
||||
}
|
||||
int1 = j_int_factor*s4 + (1.0-j_int_factor)*s3;
|
||||
int2 = j_int_factor*s2 + (1.0-j_int_factor)*s1;
|
||||
int_val = i_int_factor*int1+(1.0-i_int_factor)*int2;
|
||||
t1c[jc] = i_int_factor*int1+(1.0-i_int_factor)*int2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* perform interpolation and addition to next finest grid */
|
||||
void intadd(long kc, long my_num)
|
||||
{
|
||||
long ic;
|
||||
long if17;
|
||||
long jf;
|
||||
long jc;
|
||||
long kf;
|
||||
long istart;
|
||||
long jstart;
|
||||
long iend;
|
||||
long jend;
|
||||
double hc;
|
||||
double hf;
|
||||
double int1;
|
||||
double int2;
|
||||
double i_int_factor1;
|
||||
double j_int_factor1;
|
||||
double i_int_factor2;
|
||||
double j_int_factor2;
|
||||
long i_off;
|
||||
long j_off;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
double *t1c;
|
||||
double *t1d;
|
||||
double *t1e;
|
||||
|
||||
kf = kc + 1;
|
||||
hc = lev_res[kc];
|
||||
hf = lev_res[kf];
|
||||
|
||||
istart = gp[my_num].rlist[kc];
|
||||
jstart = gp[my_num].rljst[kc];
|
||||
iend = gp[my_num].rlien[kc] - 1;
|
||||
jend = gp[my_num].rljen[kc] - 1;
|
||||
i_off = gp[my_num].rownum*ypts_per_proc[kc];
|
||||
j_off = gp[my_num].colnum*xpts_per_proc[kc];
|
||||
|
||||
t2a = (double **) q_multi[my_num][kc];
|
||||
t2b = (double **) q_multi[my_num][kf];
|
||||
if17 = 2*(istart-1);
|
||||
for(ic=istart;ic<=iend;ic++) {
|
||||
if17+=2;
|
||||
i_int_factor1= ((imx[kc]-2)-(ic+i_off-1)) * (i_int_coeff[kf]);
|
||||
i_int_factor2= (ic+i_off) * i_int_coeff[kf];
|
||||
jf = 2*(jstart-1);
|
||||
|
||||
t1a = (double *) t2a[ic];
|
||||
t1b = (double *) t2a[ic-1];
|
||||
t1c = (double *) t2a[ic+1];
|
||||
t1d = (double *) t2b[if17];
|
||||
t1e = (double *) t2b[if17-1];
|
||||
for(jc=jstart;jc<=jend;jc++) {
|
||||
jf+=2;
|
||||
j_int_factor1= ((jmx[kc]-2)-(jc+j_off-1)) * (j_int_coeff[kf]);
|
||||
j_int_factor2= (jc+j_off) * j_int_coeff[kf];
|
||||
|
||||
int1 = j_int_factor1*t1a[jc-1] + (1.0-j_int_factor1)*t1a[jc];
|
||||
int2 = j_int_factor1*t1b[jc-1] + (1.0-j_int_factor1)*t1b[jc];
|
||||
t1e[jf-1] += i_int_factor1*int2 + (1.0-i_int_factor1)*int1;
|
||||
int2 = j_int_factor1*t1c[jc-1] + (1.0-j_int_factor1)*t1c[jc];
|
||||
t1d[jf-1] += i_int_factor2*int2 + (1.0-i_int_factor2)*int1;
|
||||
int1 = j_int_factor2*t1a[jc+1] + (1.0-j_int_factor2)*t1a[jc];
|
||||
int2 = j_int_factor2*t1b[jc+1] + (1.0-j_int_factor2)*t1b[jc];
|
||||
t1e[jf] += i_int_factor1*int2 + (1.0-i_int_factor1)*int1;
|
||||
int2 = j_int_factor2*t1c[jc+1] + (1.0-j_int_factor2)*t1c[jc];
|
||||
t1d[jf] += i_int_factor2*int2 + (1.0-i_int_factor2)*int1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* initialize a grid to zero in parallel */
|
||||
void putz(long k, long my_num)
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long istart;
|
||||
long jstart;
|
||||
long iend;
|
||||
long jend;
|
||||
double **t2a;
|
||||
double *t1a;
|
||||
|
||||
istart = gp[my_num].rlist[k];
|
||||
jstart = gp[my_num].rljst[k];
|
||||
iend = gp[my_num].rlien[k];
|
||||
jend = gp[my_num].rljen[k];
|
||||
|
||||
t2a = (double **) q_multi[my_num][k];
|
||||
for (i=istart;i<=iend;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
for (j=jstart;j<=jend;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copy_borders(long k, long pid)
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long jj;
|
||||
long im;
|
||||
long jm;
|
||||
long lastrow;
|
||||
long lastcol;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
|
||||
im = (imx[k]-2)/yprocs + 2;
|
||||
jm = (jmx[k]-2)/xprocs + 2;
|
||||
lastrow = (imx[k]-2)/yprocs;
|
||||
lastcol = (jmx[k]-2)/xprocs;
|
||||
|
||||
t2a = (double **) q_multi[pid][k];
|
||||
jj = gp[pid].neighbors[UPLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0]=q_multi[jj][k][im-2][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[UPRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1]=q_multi[jj][k][im-2][1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNLEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0]=q_multi[jj][k][1][jm-2];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1]=q_multi[jj][k][1][1];
|
||||
}
|
||||
|
||||
if (gp[pid].neighbors[UP] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = q_multi[jj][k][0][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = q_multi[jj][k][1][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = q_multi[jj][k][0][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = q_multi[jj][k][1][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
||||
jj = gp[pid].neighbors[LEFT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = q_multi[jj][k][im-1][jm-2];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = q_multi[jj][k][im-2][0];
|
||||
}
|
||||
}
|
||||
jj = gp[pid].neighbors[RIGHT];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = q_multi[jj][k][im-1][1];
|
||||
} else {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = q_multi[jj][k][im-2][jm-1];
|
||||
}
|
||||
}
|
||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][0] = q_multi[jj][k][im-2][0];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][0] = q_multi[jj][k][1][0];
|
||||
}
|
||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
||||
jj = gp[pid].neighbors[UP];
|
||||
if (jj != -1) {
|
||||
t2a[0][jm-1] = q_multi[jj][k][im-2][jm-1];
|
||||
}
|
||||
jj = gp[pid].neighbors[DOWN];
|
||||
if (jj != -1) {
|
||||
t2a[im-1][jm-1] = q_multi[jj][k][1][jm-1];
|
||||
}
|
||||
}
|
||||
|
||||
j = gp[pid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) q_multi[j][k][im-2];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) q_multi[j][k][1];
|
||||
for (i=1;i<=lastcol;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) q_multi[j][k];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[pid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) q_multi[j][k];
|
||||
for (i=1;i<=lastrow;i++) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void copy_rhs_borders(long k, long procid)
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long im;
|
||||
long jm;
|
||||
long lastrow;
|
||||
long lastcol;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
|
||||
im = (imx[k]-2)/yprocs+2;
|
||||
jm = (jmx[k]-2)/xprocs+2;
|
||||
lastrow = (imx[k]-2)/yprocs;
|
||||
lastcol = (jmx[k]-2)/xprocs;
|
||||
|
||||
t2a = (double **) rhs_multi[procid][k];
|
||||
if (gp[procid].neighbors[UPLEFT] != -1) {
|
||||
j = gp[procid].neighbors[UPLEFT];
|
||||
t2a[0][0] = rhs_multi[j][k][im-2][jm-2];
|
||||
}
|
||||
|
||||
if (gp[procid].neighbors[UP] != -1) {
|
||||
j = gp[procid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) rhs_multi[j][k][im-2];
|
||||
for (i=2;i<=lastcol;i+=2) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] != -1) {
|
||||
j = gp[procid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) rhs_multi[j][k];
|
||||
for (i=2;i<=lastrow;i+=2) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copy_red(long k, long procid)
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long im;
|
||||
long jm;
|
||||
long lastrow;
|
||||
long lastcol;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
|
||||
im = (imx[k]-2)/yprocs+2;
|
||||
jm = (jmx[k]-2)/xprocs+2;
|
||||
lastrow = (imx[k]-2)/yprocs;
|
||||
lastcol = (jmx[k]-2)/xprocs;
|
||||
|
||||
t2a = (double **) q_multi[procid][k];
|
||||
j = gp[procid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) q_multi[j][k][im-2];
|
||||
for (i=2;i<=lastcol;i+=2) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) q_multi[j][k][1];
|
||||
for (i=1;i<=lastcol;i+=2) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) q_multi[j][k];
|
||||
for (i=2;i<=lastrow;i+=2) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) q_multi[j][k];
|
||||
for (i=1;i<=lastrow;i+=2) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void copy_black(long k, long procid)
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long im;
|
||||
long jm;
|
||||
long lastrow;
|
||||
long lastcol;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
|
||||
im = (imx[k]-2)/yprocs+2;
|
||||
jm = (jmx[k]-2)/xprocs+2;
|
||||
lastrow = (imx[k]-2)/yprocs;
|
||||
lastcol = (jmx[k]-2)/xprocs;
|
||||
|
||||
t2a = (double **) q_multi[procid][k];
|
||||
j = gp[procid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) q_multi[j][k][im-2];
|
||||
for (i=1;i<=lastcol;i+=2) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) q_multi[j][k][1];
|
||||
for (i=2;i<=lastcol;i+=2) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) q_multi[j][k];
|
||||
for (i=1;i<=lastrow;i+=2) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) q_multi[j][k];
|
||||
for (i=2;i<=lastrow;i+=2) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
830
splash2/codes/apps/ocean/contiguous_partitions/slave1.C
Normal file
830
splash2/codes/apps/ocean/contiguous_partitions/slave1.C
Normal file
|
@ -0,0 +1,830 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* ****************
|
||||
subroutine slave
|
||||
**************** */
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include "decs.h"
|
||||
|
||||
void slave()
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long nstep;
|
||||
long iindex;
|
||||
long iday;
|
||||
double ysca1;
|
||||
double y;
|
||||
double factor;
|
||||
double sintemp;
|
||||
double curlt;
|
||||
double ressqr;
|
||||
long istart;
|
||||
long iend;
|
||||
long jstart;
|
||||
long jend;
|
||||
long ist;
|
||||
long ien;
|
||||
long jst;
|
||||
long jen;
|
||||
double fac;
|
||||
long dayflag=0;
|
||||
long dhourflag=0;
|
||||
long endflag=0;
|
||||
long firstrow;
|
||||
long lastrow;
|
||||
long numrows;
|
||||
long firstcol;
|
||||
long lastcol;
|
||||
long numcols;
|
||||
long psiindex;
|
||||
double psibipriv;
|
||||
double ttime;
|
||||
double dhour;
|
||||
double day;
|
||||
long procid;
|
||||
long psinum;
|
||||
long j_off = 0;
|
||||
unsigned long t1;
|
||||
double **t2a;
|
||||
double **t2b;
|
||||
double *t1a;
|
||||
double *t1b;
|
||||
double *t1c;
|
||||
double *t1d;
|
||||
|
||||
ressqr = lev_res[numlev-1] * lev_res[numlev-1];
|
||||
|
||||
LOCK(locks->idlock)
|
||||
procid = global->id;
|
||||
global->id = global->id+1;
|
||||
UNLOCK(locks->idlock)
|
||||
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_prini,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
||||
processors to avoid migration. */
|
||||
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute
|
||||
data structures across physically distributed memories as
|
||||
desired.
|
||||
|
||||
One way to do this is as follows. The function allocate(START,SIZE,I)
|
||||
is assumed to place all addresses x such that
|
||||
(START <= x < START+SIZE) on node I.
|
||||
|
||||
long d_size;
|
||||
unsigned long g_size;
|
||||
unsigned long mg_size;
|
||||
|
||||
if (procid == MASTER) {
|
||||
g_size = ((jmx[numlev-1]-2)/xprocs+2)*((imx[numlev-1]-2)/yprocs+2)*siz
|
||||
eof(double) +
|
||||
((imx[numlev-1]-2)/yprocs+2)*sizeof(double *);
|
||||
|
||||
mg_size = numlev*sizeof(double **);
|
||||
for (i=0;i<numlev;i++) {
|
||||
mg_size+=((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
||||
}
|
||||
for (i= 0;i<nprocs;i++) {
|
||||
d_size = 2*sizeof(double **);
|
||||
allocate((unsigned long) psi[i],d_size,i);
|
||||
allocate((unsigned long) psim[i],d_size,i);
|
||||
allocate((unsigned long) work1[i],d_size,i);
|
||||
allocate((unsigned long) work4[i],d_size,i);
|
||||
allocate((unsigned long) work5[i],d_size,i);
|
||||
allocate((unsigned long) work7[i],d_size,i);
|
||||
allocate((unsigned long) temparray[i],d_size,i);
|
||||
allocate((unsigned long) psi[i][0],g_size,i);
|
||||
allocate((unsigned long) psi[i][1],g_size,i);
|
||||
allocate((unsigned long) psim[i][0],g_size,i);
|
||||
allocate((unsigned long) psim[i][1],g_size,i);
|
||||
allocate((unsigned long) psium[i],g_size,i);
|
||||
allocate((unsigned long) psilm[i],g_size,i);
|
||||
allocate((unsigned long) psib[i],g_size,i);
|
||||
allocate((unsigned long) ga[i],g_size,i);
|
||||
allocate((unsigned long) gb[i],g_size,i);
|
||||
allocate((unsigned long) work1[i][0],g_size,i);
|
||||
allocate((unsigned long) work1[i][1],g_size,i);
|
||||
allocate((unsigned long) work2[i],g_size,i);
|
||||
allocate((unsigned long) work3[i],g_size,i);
|
||||
allocate((unsigned long) work4[i][0],g_size,i);
|
||||
allocate((unsigned long) work4[i][1],g_size,i);
|
||||
allocate((unsigned long) work5[i][0],g_size,i);
|
||||
allocate((unsigned long) work5[i][1],g_size,i);
|
||||
allocate((unsigned long) work6[i],g_size,i);
|
||||
allocate((unsigned long) work7[i][0],g_size,i);
|
||||
allocate((unsigned long) work7[i][1],g_size,i);
|
||||
allocate((unsigned long) temparray[i][0],g_size,i);
|
||||
allocate((unsigned long) temparray[i][1],g_size,i);
|
||||
allocate((unsigned long) tauz[i],g_size,i);
|
||||
allocate((unsigned long) oldga[i],g_size,i);
|
||||
allocate((unsigned long) oldgb[i],g_size,i);
|
||||
d_size = numlev * sizeof(long);
|
||||
allocate((unsigned long) gp[i].rel_num_x,d_size,i);
|
||||
allocate((unsigned long) gp[i].rel_num_y,d_size,i);
|
||||
allocate((unsigned long) gp[i].eist,d_size,i);
|
||||
allocate((unsigned long) gp[i].ejst,d_size,i);
|
||||
allocate((unsigned long) gp[i].oist,d_size,i);
|
||||
allocate((unsigned long) gp[i].ojst,d_size,i);
|
||||
allocate((unsigned long) gp[i].rlist,d_size,i);
|
||||
allocate((unsigned long) gp[i].rljst,d_size,i);
|
||||
allocate((unsigned long) gp[i].rlien,d_size,i);
|
||||
allocate((unsigned long) gp[i].rljen,d_size,i);
|
||||
|
||||
allocate((unsigned long) q_multi[i],mg_size,i);
|
||||
allocate((unsigned long) rhs_multi[i],mg_size,i);
|
||||
allocate((unsigned long) &(gp[i]),sizeof(struct Global_Private),i);
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
t2a = (double **) oldga[procid];
|
||||
t2b = (double **) oldgb[procid];
|
||||
for (i=0;i<im;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
for (j=0;j<jm;j++) {
|
||||
t1a[j] = 0.0;
|
||||
t1b[j] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
firstcol = 1;
|
||||
lastcol = firstcol + gp[procid].rel_num_x[numlev-1] - 1;
|
||||
firstrow = 1;
|
||||
lastrow = firstrow + gp[procid].rel_num_y[numlev-1] - 1;
|
||||
numcols = gp[procid].rel_num_x[numlev-1];
|
||||
numrows = gp[procid].rel_num_y[numlev-1];
|
||||
j_off = gp[procid].colnum*numcols;
|
||||
|
||||
if (procid > nprocs/2) {
|
||||
psinum = 2;
|
||||
} else {
|
||||
psinum = 1;
|
||||
}
|
||||
|
||||
/* every process gets its own copy of the timing variables to avoid
|
||||
contention at shared memory locations. here, these variables
|
||||
are initialized. */
|
||||
|
||||
ttime = 0.0;
|
||||
dhour = 0.0;
|
||||
nstep = 0 ;
|
||||
day = 0.0;
|
||||
|
||||
ysca1 = 0.5*ysca;
|
||||
if (procid == MASTER) {
|
||||
t1a = (double *) f;
|
||||
for (iindex = 0;iindex<=jmx[numlev-1]-1;iindex++) {
|
||||
y = ((double) iindex)*res;
|
||||
t1a[iindex] = f0+beta*(y-ysca1);
|
||||
}
|
||||
}
|
||||
|
||||
t2a = (double **) psium[procid];
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0]=0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0]=0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1]=0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1]=0.0;
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
t1a[iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
t2a = (double **) psilm[procid];
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0]=0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0]=0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1]=0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1]=0.0;
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
t1a[iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
t2a = (double **) psib[procid];
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0]=1.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1]=1.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0]=1.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1]=1.0;
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 1.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 1.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][0] = 1.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][jm-1] = 1.0;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
t1a[iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* wait until all processes have completed the above initialization */
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_prini,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
/* compute psib array (one-time computation) and integrate into psibi */
|
||||
|
||||
istart = 1;
|
||||
iend = istart + gp[procid].rel_num_y[numlev-1] - 1;
|
||||
jstart = 1;
|
||||
jend = jstart + gp[procid].rel_num_x[numlev-1] - 1;
|
||||
ist = istart;
|
||||
ien = iend;
|
||||
jst = jstart;
|
||||
jen = jend;
|
||||
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
istart = 0;
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
jstart = 0;
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
iend = im-1;
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
jend = jm-1;
|
||||
}
|
||||
|
||||
t2a = (double **) rhs_multi[procid][numlev-1];
|
||||
t2b = (double **) psib[procid];
|
||||
for(i=istart;i<=iend;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
for(j=jstart;j<=jend;j++) {
|
||||
t1a[j] = t1b[j] * ressqr;
|
||||
}
|
||||
}
|
||||
t2a = (double **) q_multi[procid][numlev-1];
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) t2b[0];
|
||||
for(j=jstart;j<=jend;j++) {
|
||||
t1a[j] = t1b[j];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) t2b[im-1];
|
||||
for(j=jstart;j<=jend;j++) {
|
||||
t1a[j] = t1b[j];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(i=istart;i<=iend;i++) {
|
||||
t2a[i][0] = t2b[i][0];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(i=istart;i<=iend;i++) {
|
||||
t2a[i][jm-1] = t2b[i][jm-1];
|
||||
}
|
||||
}
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_psini,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
t2a = (double **) psib[procid];
|
||||
j = gp[procid].neighbors[UP];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) psib[j][im-2];
|
||||
for (i=1;i<jm-1;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) psib[j][1];
|
||||
for (i=1;i<jm-1;i++) {
|
||||
t1a[i] = t1b[i];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[LEFT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) psib[j];
|
||||
for (i=1;i<im-1;i++) {
|
||||
t2a[i][0] = t2b[i][jm-2];
|
||||
}
|
||||
}
|
||||
j = gp[procid].neighbors[RIGHT];
|
||||
if (j != -1) {
|
||||
t2b = (double **) psib[j];
|
||||
for (i=1;i<im-1;i++) {
|
||||
t2a[i][jm-1] = t2b[i][1];
|
||||
}
|
||||
}
|
||||
|
||||
t2a = (double **) q_multi[procid][numlev-1];
|
||||
t2b = (double **) psib[procid];
|
||||
fac = 1.0 / (4.0 - ressqr*eig2);
|
||||
for(i=ist;i<=ien;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
t1c = (double *) t2b[i-1];
|
||||
t1d = (double *) t2b[i+1];
|
||||
for(j=jst;j<=jen;j++) {
|
||||
t1a[j] = fac * (t1d[j]+t1c[j]+t1b[j+1]+t1b[j-1] -
|
||||
ressqr*t1b[j]);
|
||||
}
|
||||
}
|
||||
|
||||
multig(procid);
|
||||
|
||||
for(i=istart;i<=iend;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
for(j=jstart;j<=jend;j++) {
|
||||
t1b[j] = t1a[j];
|
||||
}
|
||||
}
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_prini,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
/* update the local running sum psibipriv by summing all the resulting
|
||||
values in that process's share of the psib matrix */
|
||||
|
||||
t2a = (double **) psib[procid];
|
||||
psibipriv=0.0;
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
psibipriv = psibipriv + 0.25*(t2a[0][0]);
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
psibipriv = psibipriv + 0.25*(t2a[0][jm-1]);
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
psibipriv=psibipriv+0.25*(t2a[im-1][0]);
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
psibipriv=psibipriv+0.25*(t2a[im-1][jm-1]);
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
psibipriv = psibipriv + 0.5*t1a[j];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
psibipriv = psibipriv + 0.5*t1a[j];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
psibipriv = psibipriv + 0.5*t2a[j][0];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
psibipriv = psibipriv + 0.5*t2a[j][jm-1];
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
psibipriv = psibipriv + t1a[iindex];
|
||||
}
|
||||
}
|
||||
|
||||
/* update the shared variable psibi by summing all the psibiprivs
|
||||
of the individual processes into it. note that this combined
|
||||
private and shared sum method avoids accessing the shared
|
||||
variable psibi once for every element of the matrix. */
|
||||
|
||||
LOCK(locks->psibilock)
|
||||
global->psibi = global->psibi + psibipriv;
|
||||
UNLOCK(locks->psibilock)
|
||||
|
||||
/* initialize psim matrices
|
||||
|
||||
if there is more than one process, then split the processes
|
||||
between the two psim matrices; otherwise, let the single process
|
||||
work on one first and then the other */
|
||||
|
||||
for(psiindex=0;psiindex<=1;psiindex++) {
|
||||
t2a = (double **) psim[procid][psiindex];
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0] = 0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0] = 0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1] = 0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1] = 0.0;
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
t1a[iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* initialize psi matrices the same way */
|
||||
|
||||
for(psiindex=0;psiindex<=1;psiindex++) {
|
||||
t2a = (double **) psi[procid][psiindex];
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0] = 0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1] = 0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0] = 0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1] = 0.0;
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
t1a[iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* compute input curl of wind stress */
|
||||
|
||||
t2a = (double **) tauz[procid];
|
||||
ysca1 = .5*ysca;
|
||||
factor= -t0*pi/ysca1;
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0] = 0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0] = 0.0;
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
t2a[0][jm-1] = factor*sintemp;
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
t2a[im-1][jm-1] = factor*sintemp;
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
sintemp = pi*((double) j+j_off)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
curlt = factor*sintemp;
|
||||
t1a[j] = curlt;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
sintemp = pi*((double) j+j_off)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
curlt = factor*sintemp;
|
||||
t1a[j] = curlt;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
curlt = factor*sintemp;
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][jm-1] = curlt;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
sintemp = pi*((double) iindex+j_off)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
curlt = factor*sintemp;
|
||||
t1a[iindex] = curlt;
|
||||
}
|
||||
}
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_onetime,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
|
||||
/***************************************************************
|
||||
one-time stuff over at this point
|
||||
***************************************************************/
|
||||
|
||||
while (!endflag) {
|
||||
while ((!dayflag) || (!dhourflag)) {
|
||||
dayflag = 0;
|
||||
dhourflag = 0;
|
||||
if (nstep == 1) {
|
||||
if (procid == MASTER) {
|
||||
CLOCK(global->trackstart)
|
||||
}
|
||||
if ((procid == MASTER) || (do_stats)) {
|
||||
CLOCK(t1);
|
||||
gp[procid].total_time = t1;
|
||||
gp[procid].multi_time = 0;
|
||||
}
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
||||
statistics that one is measuring about the parallel execution */
|
||||
}
|
||||
|
||||
slave2(procid,firstrow,lastrow,numrows,firstcol,lastcol,numcols);
|
||||
|
||||
/* update time and step number
|
||||
note that these time and step variables are private i.e. every
|
||||
process has its own copy and keeps track of its own time */
|
||||
|
||||
ttime = ttime + dtau;
|
||||
nstep = nstep + 1;
|
||||
day = ttime/86400.0;
|
||||
|
||||
if (day > ((double) outday0)) {
|
||||
dayflag = 1;
|
||||
iday = (long) day;
|
||||
dhour = dhour+dtau;
|
||||
if (dhour >= 86400.0) {
|
||||
dhourflag = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
dhour = 0.0;
|
||||
|
||||
t2a = (double **) psium[procid];
|
||||
t2b = (double **) psim[procid][0];
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0] = t2a[0][0]+t2b[0][0];
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0] = t2a[im-1][0]+t2b[im-1][0];
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1] = t2a[0][jm-1]+t2b[0][jm-1];
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1] = t2a[im-1][jm-1] +
|
||||
t2b[im-1][jm-1];
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) t2b[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = t1a[j]+t1b[j];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) t2b[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = t1a[j] + t1b[j];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][0] = t2a[j][0]+t2b[j][0];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][jm-1] = t2a[j][jm-1] +
|
||||
t2b[j][jm-1];
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
t1a[iindex] = t1a[iindex] + t1b[iindex];
|
||||
}
|
||||
}
|
||||
|
||||
/* update values of psilm array to psilm + psim[2] */
|
||||
|
||||
t2a = (double **) psilm[procid];
|
||||
t2b = (double **) psim[procid][1];
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[0][0] = t2a[0][0]+t2b[0][0];
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
||||
t2a[im-1][0] = t2a[im-1][0]+t2b[im-1][0];
|
||||
}
|
||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[0][jm-1] = t2a[0][jm-1]+t2b[0][jm-1];
|
||||
}
|
||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
||||
t2a[im-1][jm-1] = t2a[im-1][jm-1] +
|
||||
t2b[im-1][jm-1];
|
||||
}
|
||||
if (gp[procid].neighbors[UP] == -1) {
|
||||
t1a = (double *) t2a[0];
|
||||
t1b = (double *) t2b[0];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = t1a[j]+t1b[j];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[DOWN] == -1) {
|
||||
t1a = (double *) t2a[im-1];
|
||||
t1b = (double *) t2b[im-1];
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
t1a[j] = t1a[j]+t1b[j];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[LEFT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][0] = t2a[j][0]+t2b[j][0];
|
||||
}
|
||||
}
|
||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
t2a[j][jm-1] = t2a[j][jm-1] + t2b[j][jm-1];
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
t1a = (double *) t2a[i];
|
||||
t1b = (double *) t2b[i];
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
t1a[iindex] = t1a[iindex] + t1b[iindex];
|
||||
}
|
||||
}
|
||||
if (iday >= (long) outday3) {
|
||||
endflag = 1;
|
||||
}
|
||||
}
|
||||
if ((procid == MASTER) || (do_stats)) {
|
||||
CLOCK(t1);
|
||||
gp[procid].total_time = t1-gp[procid].total_time;
|
||||
}
|
||||
}
|
||||
|
1205
splash2/codes/apps/ocean/contiguous_partitions/slave2.C
Normal file
1205
splash2/codes/apps/ocean/contiguous_partitions/slave2.C
Normal file
File diff suppressed because it is too large
Load diff
112
splash2/codes/apps/ocean/contiguous_partitions/subblock.C
Normal file
112
splash2/codes/apps/ocean/contiguous_partitions/subblock.C
Normal file
|
@ -0,0 +1,112 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "decs.h"
|
||||
|
||||
void subblock()
|
||||
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long k;
|
||||
long xportion;
|
||||
long xextra;
|
||||
long yportion;
|
||||
long yextra;
|
||||
long my_num;
|
||||
|
||||
/* Determine starting coord and number of points to process in */
|
||||
/* each direction */
|
||||
|
||||
for (i=0;i<numlev;i++) {
|
||||
xportion = (jmx[i] - 2) / xprocs;
|
||||
xextra = (jmx[i] - 2) % xprocs;
|
||||
for (j=0;j<xprocs;j++) {
|
||||
for (k=0;k<yprocs;k++) {
|
||||
gp[k*xprocs+j].rel_num_x[i] = xportion;
|
||||
}
|
||||
}
|
||||
yportion = (imx[i] - 2) / yprocs;
|
||||
yextra = (imx[i] - 2) % yprocs;
|
||||
for (j=0;j<yprocs;j++) {
|
||||
for (k=0;k<xprocs;k++) {
|
||||
gp[j*xprocs+k].rel_num_y[i] = yportion;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (my_num=0;my_num<nprocs;my_num++) {
|
||||
for (i=0;i<numlev;i++) {
|
||||
gp[my_num].rlist[i] = 1;
|
||||
gp[my_num].rljst[i] = 1;
|
||||
gp[my_num].rlien[i] = gp[my_num].rlist[i] + gp[my_num].rel_num_y[i];
|
||||
gp[my_num].rljen[i] = gp[my_num].rljst[i] + gp[my_num].rel_num_x[i];
|
||||
gp[my_num].eist[i] = gp[my_num].rlist[i] + 1;
|
||||
gp[my_num].oist[i] = gp[my_num].rlist[i];
|
||||
gp[my_num].ejst[i] = gp[my_num].rljst[i] + 1;
|
||||
gp[my_num].ojst[i] = gp[my_num].rljst[i];
|
||||
}
|
||||
}
|
||||
for (i=0;i<nprocs;i++) {
|
||||
gp[i].neighbors[LEFT] = -1;
|
||||
gp[i].neighbors[RIGHT] = -1;
|
||||
gp[i].neighbors[UP] = -1;
|
||||
gp[i].neighbors[DOWN] = -1;
|
||||
gp[i].neighbors[UPLEFT] = -1;
|
||||
gp[i].neighbors[UPRIGHT] = -1;
|
||||
gp[i].neighbors[DOWNLEFT] = -1;
|
||||
gp[i].neighbors[DOWNRIGHT] = -1;
|
||||
if (i >= xprocs) {
|
||||
gp[i].neighbors[UP] = i-xprocs;
|
||||
}
|
||||
if (i < nprocs-xprocs) {
|
||||
gp[i].neighbors[DOWN] = i+xprocs;
|
||||
}
|
||||
if ((i % xprocs) > 0) {
|
||||
gp[i].neighbors[LEFT] = i-1;
|
||||
}
|
||||
if ((i % xprocs) < (xprocs-1)) {
|
||||
gp[i].neighbors[RIGHT] = i+1;
|
||||
}
|
||||
j = gp[i].neighbors[UP];
|
||||
if (j != -1) {
|
||||
if ((j % xprocs) > 0) {
|
||||
gp[i].neighbors[UPLEFT] = j-1;
|
||||
}
|
||||
if ((j % xprocs) < (xprocs-1)) {
|
||||
gp[i].neighbors[UPRIGHT] = j+1;
|
||||
}
|
||||
}
|
||||
j = gp[i].neighbors[DOWN];
|
||||
if (j != -1) {
|
||||
if ((j % xprocs) > 0) {
|
||||
gp[i].neighbors[DOWNLEFT] = j-1;
|
||||
}
|
||||
if ((j % xprocs) < (xprocs-1)) {
|
||||
gp[i].neighbors[DOWNRIGHT] = j+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i=0;i<nprocs;i++) {
|
||||
gp[i].rownum = i/xprocs;
|
||||
gp[i].colnum = i%xprocs;
|
||||
}
|
||||
}
|
||||
|
13
splash2/codes/apps/ocean/non_contiguous_partitions/Makefile
Normal file
13
splash2/codes/apps/ocean/non_contiguous_partitions/Makefile
Normal file
|
@ -0,0 +1,13 @@
|
|||
TARGET = OCEAN
|
||||
OBJS = jacobcalc.o laplacalc.o main.o multi.o slave1.o slave2.o
|
||||
|
||||
include ../../../Makefile.config
|
||||
|
||||
decs.h: decs.H
|
||||
jacobcalc.c: decs.h
|
||||
main.c: decs.h
|
||||
slave1.c: decs.h
|
||||
laplacalc.c: decs.h
|
||||
multi.c : decs.h
|
||||
slave2.c: decs.h
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
shmid 0x2f44 shmvaddr 0x500000
|
||||
|
||||
Ocean simulation with W-cycle multigrid solver
|
||||
Processors : 1
|
||||
Grid size : 258 x 258
|
||||
Grid resolution (meters) : 20000.00
|
||||
Time between relaxations (seconds) : 28800
|
||||
Error tolerance : 1e-07
|
||||
|
||||
MULTIGRID OUTPUTS
|
||||
iter 71, level 8, residual norm 8.00274594e-08, work = 33.875
|
||||
iter 31, level 8, residual norm 4.08062997e-08, work = 13.563
|
||||
iter 22, level 8, residual norm 5.94548243e-08, work = 9.438
|
||||
iter 12, level 8, residual norm 4.05573548e-08, work = 6.188
|
||||
iter 2, level 8, residual norm 8.20209761e-08, work = 2.000
|
||||
iter 5, level 8, residual norm 6.54258352e-08, work = 5.000
|
||||
iter 3, level 8, residual norm 7.23930444e-08, work = 3.000
|
||||
iter 12, level 8, residual norm 3.56346364e-08, work = 6.188
|
||||
iter 2, level 8, residual norm 5.93080936e-08, work = 2.000
|
||||
iter 4, level 8, residual norm 8.54596640e-08, work = 4.000
|
||||
iter 11, level 8, residual norm 3.70162668e-08, work = 6.125
|
||||
iter 13, level 8, residual norm 3.34750572e-08, work = 7.188
|
||||
iter 12, level 8, residual norm 2.45353138e-08, work = 6.188
|
||||
|
||||
PROCESS STATISTICS
|
||||
Total Multigrid Multigrid
|
||||
Proc Time Time Fraction
|
||||
0 79990673 21750269 0.272
|
||||
Avg 79990673 21750269 0.272
|
||||
Min 79990673 21750269 0.272
|
||||
Max 79990673 21750269 0.272
|
||||
|
||||
TIMING INFORMATION
|
||||
Start time : 972660212
|
||||
Initialization finish time : 1017890583
|
||||
Overall finish time : 1097881258
|
||||
Total time with initialization : 125221046
|
||||
Total time without initialization : 79990675
|
||||
(excludes first timestep)
|
||||
|
262
splash2/codes/apps/ocean/non_contiguous_partitions/decs.H
Normal file
262
splash2/codes/apps/ocean/non_contiguous_partitions/decs.H
Normal file
|
@ -0,0 +1,262 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#define INPROCS 16
|
||||
#define IMAX 258
|
||||
#define JMAX 258
|
||||
#define MAX_LEVELS 9
|
||||
#define MASTER 0
|
||||
#define RED_ITER 0
|
||||
#define BLACK_ITER 1
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
EXTERN_ENV
|
||||
|
||||
struct global_struct {
|
||||
long id;
|
||||
long starttime;
|
||||
long trackstart;
|
||||
double psiai;
|
||||
double psibi;
|
||||
};
|
||||
|
||||
struct fields_struct {
|
||||
double psi[2][IMAX][JMAX];
|
||||
double psim[2][IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct fields2_struct {
|
||||
double psium[IMAX][JMAX];
|
||||
double psilm[IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct wrk1_struct {
|
||||
double psib[IMAX][JMAX];
|
||||
double ga[IMAX][JMAX];
|
||||
double gb[IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct wrk3_struct {
|
||||
double work1[2][IMAX][JMAX];
|
||||
double work2[IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct wrk2_struct {
|
||||
double work3[IMAX][JMAX];
|
||||
double f[IMAX];
|
||||
};
|
||||
|
||||
struct wrk4_struct {
|
||||
double work4[2][IMAX][JMAX];
|
||||
double work5[2][IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct wrk6_struct {
|
||||
double work6[IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct wrk5_struct {
|
||||
double work7[2][IMAX][JMAX];
|
||||
double temparray[2][IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct frcng_struct {
|
||||
double tauz[IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct iter_struct {
|
||||
long notdone;
|
||||
double work8[IMAX][JMAX];
|
||||
double work9[IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct guess_struct {
|
||||
double oldga[IMAX][JMAX];
|
||||
double oldgb[IMAX][JMAX];
|
||||
};
|
||||
|
||||
struct multi_struct {
|
||||
double q_multi[MAX_LEVELS][IMAX][JMAX];
|
||||
double rhs_multi[MAX_LEVELS][IMAX][JMAX];
|
||||
double err_multi;
|
||||
long numspin;
|
||||
long spinflag[INPROCS];
|
||||
};
|
||||
|
||||
struct locks_struct {
|
||||
LOCKDEC(idlock)
|
||||
LOCKDEC(psiailock)
|
||||
LOCKDEC(psibilock)
|
||||
LOCKDEC(donelock)
|
||||
LOCKDEC(error_lock)
|
||||
LOCKDEC(bar_lock)
|
||||
};
|
||||
|
||||
struct bars_struct {
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARDEC(iteration)
|
||||
BARDEC(gsudn)
|
||||
BARDEC(p_setup)
|
||||
BARDEC(p_redph)
|
||||
BARDEC(p_soln)
|
||||
BARDEC(p_subph)
|
||||
BARDEC(sl_prini)
|
||||
BARDEC(sl_psini)
|
||||
BARDEC(sl_onetime)
|
||||
BARDEC(sl_phase_1)
|
||||
BARDEC(sl_phase_2)
|
||||
BARDEC(sl_phase_3)
|
||||
BARDEC(sl_phase_4)
|
||||
BARDEC(sl_phase_5)
|
||||
BARDEC(sl_phase_6)
|
||||
BARDEC(sl_phase_7)
|
||||
BARDEC(sl_phase_8)
|
||||
BARDEC(sl_phase_9)
|
||||
BARDEC(sl_phase_10)
|
||||
BARDEC(error_barrier)
|
||||
#else
|
||||
BARDEC(barrier)
|
||||
#endif
|
||||
};
|
||||
|
||||
extern struct global_struct *global;
|
||||
extern struct fields_struct *fields;
|
||||
extern struct fields2_struct *fields2;
|
||||
extern struct wrk1_struct *wrk1;
|
||||
extern struct wrk3_struct *wrk3;
|
||||
extern struct wrk2_struct *wrk2;
|
||||
extern struct wrk4_struct *wrk4;
|
||||
extern struct wrk6_struct *wrk6;
|
||||
extern struct wrk5_struct *wrk5;
|
||||
extern struct frcng_struct *frcng;
|
||||
extern struct iter_struct *iter;
|
||||
extern struct guess_struct *guess;
|
||||
extern struct multi_struct *multi;
|
||||
extern struct locks_struct *locks;
|
||||
extern struct bars_struct *bars;
|
||||
|
||||
extern double eig2;
|
||||
extern double ysca;
|
||||
extern long jmm1;
|
||||
extern double pi;
|
||||
extern double t0;
|
||||
|
||||
extern long *procmap;
|
||||
extern long xprocs;
|
||||
extern long yprocs;
|
||||
|
||||
extern long numlev;
|
||||
extern long imx[MAX_LEVELS];
|
||||
extern long jmx[MAX_LEVELS];
|
||||
extern double lev_res[MAX_LEVELS];
|
||||
extern double lev_tol[MAX_LEVELS];
|
||||
extern double maxwork;
|
||||
extern long minlevel;
|
||||
extern double outday0;
|
||||
extern double outday1;
|
||||
extern double outday2;
|
||||
extern double outday3;
|
||||
|
||||
extern long nprocs;
|
||||
|
||||
extern double h1;
|
||||
extern double h3;
|
||||
extern double h;
|
||||
extern double lf;
|
||||
extern double res;
|
||||
extern double dtau;
|
||||
extern double f0;
|
||||
extern double beta;
|
||||
extern double gpr;
|
||||
extern long im;
|
||||
extern long jm;
|
||||
extern long do_stats;
|
||||
extern long do_output;
|
||||
extern long *multi_times;
|
||||
extern long *total_times;
|
||||
extern double factjacob;
|
||||
extern double factlap;
|
||||
|
||||
struct Global_Private {
|
||||
char pad[PAGE_SIZE];
|
||||
double multi_time;
|
||||
double total_time;
|
||||
long rel_start_x[MAX_LEVELS];
|
||||
long rel_start_y[MAX_LEVELS];
|
||||
long rel_num_x[MAX_LEVELS];
|
||||
long rel_num_y[MAX_LEVELS];
|
||||
long eist[MAX_LEVELS];
|
||||
long ejst[MAX_LEVELS];
|
||||
long oist[MAX_LEVELS];
|
||||
long ojst[MAX_LEVELS];
|
||||
long eiest[MAX_LEVELS];
|
||||
long ejest[MAX_LEVELS];
|
||||
long oiest[MAX_LEVELS];
|
||||
long ojest[MAX_LEVELS];
|
||||
long rlist[MAX_LEVELS];
|
||||
long rljst[MAX_LEVELS];
|
||||
long rlien[MAX_LEVELS];
|
||||
long rljen[MAX_LEVELS];
|
||||
long iist[MAX_LEVELS];
|
||||
long ijst[MAX_LEVELS];
|
||||
long iien[MAX_LEVELS];
|
||||
long ijen[MAX_LEVELS];
|
||||
long pist[MAX_LEVELS];
|
||||
long pjst[MAX_LEVELS];
|
||||
long pien[MAX_LEVELS];
|
||||
long pjen[MAX_LEVELS];
|
||||
};
|
||||
|
||||
extern struct Global_Private *gp;
|
||||
|
||||
extern double i_int_coeff[MAX_LEVELS];
|
||||
extern double j_int_coeff[MAX_LEVELS];
|
||||
extern long minlev;
|
||||
|
||||
/*
|
||||
* jacobcalc.C
|
||||
*/
|
||||
void jacobcalc(double x[IMAX][JMAX], double y[IMAX][JMAX], double z[IMAX][JMAX], long pid, long firstrow, long lastrow, long firstcol, long lastcol, long numrows, long numcols);
|
||||
|
||||
/*
|
||||
* laplacalc.C
|
||||
*/
|
||||
void laplacalc(double x[IMAX][JMAX], double z[IMAX][JMAX], long firstrow, long lastrow, long firstcol, long lastcol, long numrows, long numcols);
|
||||
|
||||
/*
|
||||
* main.C
|
||||
*/
|
||||
long log_2(long number);
|
||||
void printerr(char *s);
|
||||
|
||||
/*
|
||||
* multi.C
|
||||
*/
|
||||
void multig(long my_id);
|
||||
void relax(long k, double *err, long color, long my_num);
|
||||
void rescal(long kf, long my_num);
|
||||
void intadd(long kc, long my_num);
|
||||
void putz(long k, long my_num);
|
||||
|
||||
/*
|
||||
* slave1.C
|
||||
*/
|
||||
void slave(void);
|
||||
|
||||
/*
|
||||
* slave2.C
|
||||
*/
|
||||
void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols);
|
|
@ -0,0 +1,98 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* does the arakawa jacobian calculation (of the x and y matrices,
|
||||
putting the results in the z matrix) for a subblock. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include "decs.h"
|
||||
|
||||
void jacobcalc(double x[IMAX][JMAX], double y[IMAX][JMAX], double z[IMAX][JMAX], long pid, long firstrow, long lastrow, long firstcol, long lastcol, long numrows, long numcols)
|
||||
{
|
||||
double f1;
|
||||
double f2;
|
||||
double f3;
|
||||
double f4;
|
||||
double f5;
|
||||
double f6;
|
||||
double f7;
|
||||
double f8;
|
||||
long iindex;
|
||||
long indexp1;
|
||||
long indexm1;
|
||||
long im1;
|
||||
long ip1;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
if (pid == MASTER) {
|
||||
z[0][0]=0.0;
|
||||
}
|
||||
if (pid == nprocs-xprocs) {
|
||||
z[im-1][0]=0.0;
|
||||
}
|
||||
if (pid == xprocs-1) {
|
||||
z[0][jm-1]=0.0;
|
||||
}
|
||||
if (pid == nprocs-1) {
|
||||
z[im-1][jm-1]=0.0;
|
||||
}
|
||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
indexp1 = iindex+1;
|
||||
indexm1 = iindex-1;
|
||||
for (i=firstrow;i<=lastrow;i++) {
|
||||
ip1 = i+1;
|
||||
im1 = i-1;
|
||||
f1 = (y[i][indexm1]+y[ip1][indexm1]-y[i][indexp1]-y[ip1][indexp1])*
|
||||
(x[ip1][iindex]-x[i][iindex]);
|
||||
f2 = (y[im1][indexm1]+y[i][indexm1]-y[im1][indexp1]-y[i][indexp1])*
|
||||
(x[i][iindex]-x[im1][iindex]);
|
||||
f3 = (y[ip1][iindex]+y[ip1][indexp1]-y[im1][iindex]-y[im1][indexp1])*
|
||||
(x[i][indexp1]-x[i][iindex]);
|
||||
f4 = (y[ip1][indexm1]+y[ip1][iindex]-y[im1][indexm1]-y[im1][iindex])*
|
||||
(x[i][iindex]-x[i][indexm1]);
|
||||
f5 = (y[ip1][iindex]-y[i][indexp1])*(x[ip1][indexp1]-x[i][iindex]);
|
||||
f6 = (y[i][indexm1]-y[im1][iindex])*(x[i][iindex]-x[im1][indexm1]);
|
||||
f7 = (y[i][indexp1]-y[im1][iindex])*(x[im1][indexp1]-x[i][iindex]);
|
||||
f8 = (y[ip1][iindex]-y[i][indexm1])*(x[i][iindex]-x[ip1][indexm1]);
|
||||
|
||||
z[i][iindex] = factjacob*(f1+f2+f3+f4+f5+f6+f7+f8);
|
||||
}
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
z[0][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
z[im-1][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
z[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
z[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* **************************************************************
|
||||
end of subroutine jacobcalc
|
||||
**************************************************************
|
||||
|
||||
performs the laplacian calculation for a subblock. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include "decs.h"
|
||||
|
||||
void laplacalc(double x[IMAX][JMAX], double z[IMAX][JMAX], long firstrow, long lastrow, long firstcol, long lastcol, long numrows, long numcols)
|
||||
{
|
||||
long iindex;
|
||||
long indexp1;
|
||||
long indexm1;
|
||||
long ip1;
|
||||
long im1;
|
||||
long i;
|
||||
long j;
|
||||
|
||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
indexp1 = iindex+1;
|
||||
indexm1 = iindex-1;
|
||||
for (i=firstrow;i<=lastrow;i++) {
|
||||
ip1 = i+1;
|
||||
im1 = i-1;
|
||||
z[i][iindex] = factlap*(x[ip1][iindex]+x[im1][iindex]+x[i][indexp1]+
|
||||
x[i][indexm1]-4.*x[i][iindex]);
|
||||
}
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
z[0][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for (j=firstcol;j<=lastcol;j++) {
|
||||
z[im-1][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
z[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for (j=firstrow;j<=lastrow;j++) {
|
||||
z[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
BIN
splash2/codes/apps/ocean/non_contiguous_partitions/libpthread.a
Normal file
BIN
splash2/codes/apps/ocean/non_contiguous_partitions/libpthread.a
Normal file
Binary file not shown.
BIN
splash2/codes/apps/ocean/non_contiguous_partitions/m5op_x86.o
Normal file
BIN
splash2/codes/apps/ocean/non_contiguous_partitions/m5op_x86.o
Normal file
Binary file not shown.
534
splash2/codes/apps/ocean/non_contiguous_partitions/main.C
Normal file
534
splash2/codes/apps/ocean/non_contiguous_partitions/main.C
Normal file
|
@ -0,0 +1,534 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* SPLASH Ocean Code */
|
||||
/* */
|
||||
/* This application studies the role of eddy and boundary currents in */
|
||||
/* influencing large-scale ocean movements. This implementation uses */
|
||||
/* statically allocated two-dimensional arrays for grid data storage. */
|
||||
/* */
|
||||
/* Command line options: */
|
||||
/* */
|
||||
/* -nN : Simulate NxN ocean. N must be (power of 2)+2. */
|
||||
/* -pP : P = number of processors. P must be power of 2. */
|
||||
/* -eE : E = error tolerance for iterative relaxation. */
|
||||
/* -rR : R = distance between grid points in meters. */
|
||||
/* -tT : T = timestep in seconds. */
|
||||
/* -s : Print timing statistics. */
|
||||
/* -o : Print out relaxation residual values. */
|
||||
/* -h : Print out command line options. */
|
||||
/* */
|
||||
/* Default: OCEAN -n130 -p1 -e1e-7 -r20000.0 -t28800.0 */
|
||||
/* */
|
||||
/* NOTE: This code works under both the FORK and SPROC models. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include "decs.h"
|
||||
|
||||
MAIN_ENV
|
||||
|
||||
#define DEFAULT_N 258
|
||||
#define DEFAULT_P 1
|
||||
#define DEFAULT_E 1e-7
|
||||
#define DEFAULT_T 28800.0
|
||||
#define DEFAULT_R 20000.0
|
||||
#define INPROCS 16 /* Maximum number of processors */
|
||||
#define IMAX 258
|
||||
#define JMAX 258
|
||||
#define MAX_LEVELS 9
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
struct global_struct *global;
|
||||
struct fields_struct *fields;
|
||||
struct fields2_struct *fields2;
|
||||
struct wrk1_struct *wrk1;
|
||||
struct wrk3_struct *wrk3;
|
||||
struct wrk2_struct *wrk2;
|
||||
struct wrk4_struct *wrk4;
|
||||
struct wrk6_struct *wrk6;
|
||||
struct wrk5_struct *wrk5;
|
||||
struct frcng_struct *frcng;
|
||||
struct iter_struct *iter;
|
||||
struct guess_struct *guess;
|
||||
struct multi_struct *multi;
|
||||
struct locks_struct *locks;
|
||||
struct bars_struct *bars;
|
||||
|
||||
long startcol[2][INPROCS];
|
||||
long nprocs = DEFAULT_P;
|
||||
long startrow[2][INPROCS];
|
||||
double h1 = 1000.0;
|
||||
double h3 = 4000.0;
|
||||
double h = 5000.0;
|
||||
double lf = -5.12e11;
|
||||
double eps = 0;
|
||||
double res = DEFAULT_R;
|
||||
double dtau = DEFAULT_T;
|
||||
double f0 = 8.3e-5;
|
||||
double beta = 2.0e-11;
|
||||
double gpr = 0.02;
|
||||
long im = DEFAULT_N;
|
||||
long jm;
|
||||
double tolerance = DEFAULT_E;
|
||||
double eig2;
|
||||
double ysca;
|
||||
long jmm1;
|
||||
double pi;
|
||||
double t0 = 0.5e-4 ;
|
||||
double outday0 = 1.0;
|
||||
double outday1 = 2.0;
|
||||
double outday2 = 2.0;
|
||||
double outday3 = 2.0;
|
||||
double factjacob;
|
||||
double factlap;
|
||||
long numlev;
|
||||
long minlev;
|
||||
long imx[MAX_LEVELS];
|
||||
long jmx[MAX_LEVELS];
|
||||
double lev_res[MAX_LEVELS];
|
||||
double lev_tol[MAX_LEVELS];
|
||||
double maxwork = 10000.0;
|
||||
|
||||
struct Global_Private *gp;
|
||||
|
||||
double i_int_coeff[MAX_LEVELS];
|
||||
double j_int_coeff[MAX_LEVELS];
|
||||
long xprocs;
|
||||
long yprocs;
|
||||
long do_stats = 0;
|
||||
long do_output = 0;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long xextra;
|
||||
long xportion;
|
||||
long yextra;
|
||||
long yportion;
|
||||
long lower;
|
||||
double procsqrt;
|
||||
long k;
|
||||
long logtest;
|
||||
long my_num;
|
||||
unsigned long computeend;
|
||||
double min_total;
|
||||
double max_total;
|
||||
double avg_total;
|
||||
double min_multi;
|
||||
double max_multi;
|
||||
double avg_multi;
|
||||
double min_frac;
|
||||
double max_frac;
|
||||
double avg_frac;
|
||||
extern char *optarg;
|
||||
long ch;
|
||||
unsigned long start;
|
||||
|
||||
CLOCK(start)
|
||||
|
||||
while ((ch = getopt(argc, argv, "n:p:e:r:t:soh")) != -1) {
|
||||
switch(ch) {
|
||||
case 'n': im = atoi(optarg);
|
||||
if (im > IMAX) {
|
||||
printerr("Max grid size exceeded\n");
|
||||
exit(-1);
|
||||
}
|
||||
if (log_2(im-2) == -1) {
|
||||
printerr("Grid must be ((power of 2)+2) in each dimension\n");
|
||||
exit(-1);
|
||||
}
|
||||
break;
|
||||
case 'p': nprocs = atoi(optarg);
|
||||
if (nprocs < 1) {
|
||||
printerr("P must be >= 1\n");
|
||||
exit(-1);
|
||||
}
|
||||
if (log_2(nprocs) == -1) {
|
||||
printerr("P must be a power of 2\n");
|
||||
exit(-1);
|
||||
}
|
||||
break;
|
||||
case 'e': tolerance = atof(optarg); break;
|
||||
case 'r': res = atof(optarg); break;
|
||||
case 't': dtau = atof(optarg); break;
|
||||
case 's': do_stats = !do_stats; break;
|
||||
case 'o': do_output = !do_output; break;
|
||||
case 'h': printf("Usage: OCEAN <options>\n\n");
|
||||
printf("options:\n");
|
||||
printf(" -nN : Simulate NxN ocean. N must be (power of 2)+2.\n");
|
||||
printf(" -pP : P = number of processors. P must be power of 2.\n");
|
||||
printf(" -eE : E = error tolerance for iterative relaxation.\n");
|
||||
printf(" -rR : R = distance between grid points in meters.\n");
|
||||
printf(" -tT : T = timestep in seconds.\n");
|
||||
printf(" -s : Print timing statistics.\n");
|
||||
printf(" -o : Print out relaxation residual values.\n");
|
||||
printf(" -h : Print out command line options.\n\n");
|
||||
printf("Default: OCEAN -n%1d -p%1d -e%1g -r%1g -t%1g\n",
|
||||
DEFAULT_N,DEFAULT_P,DEFAULT_E,DEFAULT_R,DEFAULT_T);
|
||||
exit(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MAIN_INITENV(,60000000)
|
||||
|
||||
logtest = im-2;
|
||||
numlev = 1;
|
||||
while (logtest != 1) {
|
||||
if (logtest%2 != 0) {
|
||||
printerr("Cannot determine number of multigrid levels\n");
|
||||
exit(-1);
|
||||
}
|
||||
logtest = logtest / 2;
|
||||
numlev++;
|
||||
}
|
||||
|
||||
if (numlev > MAX_LEVELS) {
|
||||
printerr("Max grid levels exceeded for multigrid\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
jm = im;
|
||||
printf("\n");
|
||||
printf("Ocean simulation with W-cycle multigrid solver\n");
|
||||
printf(" Processors : %1ld\n",nprocs);
|
||||
printf(" Grid size : %1ld x %1ld\n",im,jm);
|
||||
printf(" Grid resolution (meters) : %0.2f\n",res);
|
||||
printf(" Time between relaxations (seconds) : %0.0f\n",dtau);
|
||||
printf(" Error tolerance : %0.7g\n",tolerance);
|
||||
printf("\n");
|
||||
|
||||
gp = (struct Global_Private *) G_MALLOC((nprocs+1)*sizeof(struct Global_Private));
|
||||
for (i=0;i<nprocs;i++) {
|
||||
gp[i].multi_time = 0;
|
||||
gp[i].total_time = 0;
|
||||
}
|
||||
global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct));
|
||||
fields = (struct fields_struct *) G_MALLOC(sizeof(struct fields_struct));
|
||||
fields2 = (struct fields2_struct *) G_MALLOC(sizeof(struct fields2_struct));
|
||||
wrk1 = (struct wrk1_struct *) G_MALLOC(sizeof(struct wrk1_struct));
|
||||
wrk3 = (struct wrk3_struct *) G_MALLOC(sizeof(struct wrk3_struct));
|
||||
wrk2 = (struct wrk2_struct *) G_MALLOC(sizeof(struct wrk2_struct));
|
||||
wrk4 = (struct wrk4_struct *) G_MALLOC(sizeof(struct wrk4_struct));
|
||||
wrk6 = (struct wrk6_struct *) G_MALLOC(sizeof(struct wrk6_struct));
|
||||
wrk5 = (struct wrk5_struct *) G_MALLOC(sizeof(struct wrk5_struct));
|
||||
frcng = (struct frcng_struct *) G_MALLOC(sizeof(struct frcng_struct));
|
||||
iter = (struct iter_struct *) G_MALLOC(sizeof(struct iter_struct));
|
||||
guess = (struct guess_struct *) G_MALLOC(sizeof(struct guess_struct));
|
||||
multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct));
|
||||
locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct));
|
||||
bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct));
|
||||
|
||||
LOCKINIT(locks->idlock)
|
||||
LOCKINIT(locks->psiailock)
|
||||
LOCKINIT(locks->psibilock)
|
||||
LOCKINIT(locks->donelock)
|
||||
LOCKINIT(locks->error_lock)
|
||||
LOCKINIT(locks->bar_lock)
|
||||
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARINIT(bars->iteration, nprocs)
|
||||
BARINIT(bars->gsudn, nprocs)
|
||||
BARINIT(bars->p_setup, nprocs)
|
||||
BARINIT(bars->p_redph, nprocs)
|
||||
BARINIT(bars->p_soln, nprocs)
|
||||
BARINIT(bars->p_subph, nprocs)
|
||||
BARINIT(bars->sl_prini, nprocs)
|
||||
BARINIT(bars->sl_psini, nprocs)
|
||||
BARINIT(bars->sl_onetime, nprocs)
|
||||
BARINIT(bars->sl_phase_1, nprocs)
|
||||
BARINIT(bars->sl_phase_2, nprocs)
|
||||
BARINIT(bars->sl_phase_3, nprocs)
|
||||
BARINIT(bars->sl_phase_4, nprocs)
|
||||
BARINIT(bars->sl_phase_5, nprocs)
|
||||
BARINIT(bars->sl_phase_6, nprocs)
|
||||
BARINIT(bars->sl_phase_7, nprocs)
|
||||
BARINIT(bars->sl_phase_8, nprocs)
|
||||
BARINIT(bars->sl_phase_9, nprocs)
|
||||
BARINIT(bars->sl_phase_10, nprocs)
|
||||
BARINIT(bars->error_barrier, nprocs)
|
||||
#else
|
||||
BARINIT(bars->barrier, nprocs)
|
||||
#endif
|
||||
|
||||
imx[numlev-1] = im;
|
||||
jmx[numlev-1] = jm;
|
||||
lev_res[numlev-1] = res;
|
||||
lev_tol[numlev-1] = tolerance;
|
||||
multi->err_multi = 0.0;
|
||||
multi->numspin = 0;
|
||||
for (i=0;i<nprocs;i++) {
|
||||
multi->spinflag[i] = 0;
|
||||
}
|
||||
|
||||
for (i=numlev-2;i>=0;i--) {
|
||||
imx[i] = ((imx[i+1] - 2) / 2) + 2;
|
||||
jmx[i] = ((jmx[i+1] - 2) / 2) + 2;
|
||||
lev_res[i] = lev_res[i+1] * 2;
|
||||
}
|
||||
|
||||
xprocs = 0;
|
||||
yprocs = 0;
|
||||
procsqrt = sqrt((double) nprocs);
|
||||
j = (long) procsqrt;
|
||||
while ((xprocs == 0) && (j > 0)) {
|
||||
k = nprocs / j;
|
||||
if (k * j == nprocs) {
|
||||
if (k > j) {
|
||||
xprocs = j;
|
||||
yprocs = k;
|
||||
} else {
|
||||
xprocs = k;
|
||||
yprocs = j;
|
||||
}
|
||||
}
|
||||
j--;
|
||||
}
|
||||
if (xprocs == 0) {
|
||||
printerr("Could not find factors for subblocking\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* Determine starting coord and number of points to process in */
|
||||
/* each direction */
|
||||
|
||||
for (i=0;i<numlev;i++) {
|
||||
xportion = (jmx[i] - 2) / xprocs;
|
||||
xextra = (jmx[i] - 2) % xprocs;
|
||||
for (j=0;j<xprocs;j++) {
|
||||
if (xextra == 0) {
|
||||
for (k=0;k<yprocs;k++) {
|
||||
gp[k*xprocs+j].rel_start_x[i] = j * xportion + 1;
|
||||
gp[k*xprocs+j].rel_num_x[i] = xportion;
|
||||
}
|
||||
} else {
|
||||
if (j + 1 > xextra) {
|
||||
for (k=0;k<yprocs;k++) {
|
||||
lower = xextra * (xportion + 1);
|
||||
gp[k*xprocs+j].rel_start_x[i] = lower + (j - xextra) * xportion + 1;
|
||||
gp[k*xprocs+j].rel_num_x[i] = xportion;
|
||||
}
|
||||
} else {
|
||||
for (k=0;k<yprocs;k++) {
|
||||
gp[k*xprocs+j].rel_start_x[i] = j * (xportion + 1) + 1;
|
||||
gp[k*xprocs+j].rel_num_x[i] = xportion + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
yportion = (imx[i] - 2) / yprocs;
|
||||
yextra = (imx[i] - 2) % yprocs;
|
||||
for (j=0;j<yprocs;j++) {
|
||||
if (yextra == 0) {
|
||||
for (k=0;k<xprocs;k++) {
|
||||
gp[j*xprocs+k].rel_start_y[i] = j * yportion + 1;
|
||||
gp[j*xprocs+k].rel_num_y[i] = yportion;
|
||||
}
|
||||
} else {
|
||||
if (j + 1 > yextra) {
|
||||
for (k=0;k<xprocs;k++) {
|
||||
lower = yextra * (yportion + 1);
|
||||
gp[j*xprocs+k].rel_start_y[i] = lower + (j - yextra) * yportion + 1;
|
||||
gp[j*xprocs+k].rel_num_y[i] = yportion;
|
||||
}
|
||||
} else {
|
||||
for (k=0;k<xprocs;k++) {
|
||||
gp[j*xprocs+k].rel_start_y[i] = j * (yportion + 1) + 1;
|
||||
gp[j*xprocs+k].rel_num_y[i] = yportion + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i_int_coeff[0] = 0.0;
|
||||
j_int_coeff[0] = 0.0;
|
||||
for (i=0;i<numlev;i++) {
|
||||
i_int_coeff[i] = 1.0/(imx[i]-1);
|
||||
j_int_coeff[i] = 1.0/(jmx[i]-1);
|
||||
}
|
||||
|
||||
for (my_num=0;my_num<nprocs;my_num++) {
|
||||
for (i=0;i<numlev;i++) {
|
||||
gp[my_num].rlist[i] = gp[my_num].rel_start_y[i];
|
||||
gp[my_num].rljst[i] = gp[my_num].rel_start_x[i];
|
||||
gp[my_num].rlien[i] = gp[my_num].rlist[i] + gp[my_num].rel_num_y[i] - 1;
|
||||
gp[my_num].rljen[i] = gp[my_num].rljst[i] + gp[my_num].rel_num_x[i] - 1;
|
||||
gp[my_num].iist[i] = gp[my_num].rel_start_y[i];
|
||||
gp[my_num].ijst[i] = gp[my_num].rel_start_x[i];
|
||||
gp[my_num].iien[i] = gp[my_num].iist[i] + gp[my_num].rel_num_y[i] - 1;
|
||||
gp[my_num].ijen[i] = gp[my_num].ijst[i] + gp[my_num].rel_num_x[i] - 1;
|
||||
gp[my_num].pist[i] = gp[my_num].rel_start_y[i];
|
||||
gp[my_num].pjst[i] = gp[my_num].rel_start_x[i];
|
||||
gp[my_num].pien[i] = gp[my_num].pist[i] + gp[my_num].rel_num_y[i] - 1;
|
||||
gp[my_num].pjen[i] = gp[my_num].pjst[i] + gp[my_num].rel_num_x[i] - 1;
|
||||
|
||||
if (gp[my_num].pist[i] == 1) {
|
||||
gp[my_num].pist[i] = 0;
|
||||
}
|
||||
if (gp[my_num].pjst[i] == 1) {
|
||||
gp[my_num].pjst[i] = 0;
|
||||
}
|
||||
if (gp[my_num].pien[i] == imx[i] - 2) {
|
||||
gp[my_num].pien[i] = imx[i]-1;
|
||||
}
|
||||
if (gp[my_num].pjen[i] == jmx[i] - 2) {
|
||||
gp[my_num].pjen[i] = jmx[i]-1;
|
||||
}
|
||||
|
||||
if (gp[my_num].rlist[i] % 2 == 0) {
|
||||
gp[my_num].eist[i] = gp[my_num].rlist[i];
|
||||
gp[my_num].oist[i] = gp[my_num].rlist[i] + 1;
|
||||
} else {
|
||||
gp[my_num].eist[i] = gp[my_num].rlist[i] + 1;
|
||||
gp[my_num].oist[i] = gp[my_num].rlist[i];
|
||||
}
|
||||
if (gp[my_num].rljst[i] % 2 == 0) {
|
||||
gp[my_num].ejst[i] = gp[my_num].rljst[i];
|
||||
gp[my_num].ojst[i] = gp[my_num].rljst[i] + 1;
|
||||
} else {
|
||||
gp[my_num].ejst[i] = gp[my_num].rljst[i] + 1;
|
||||
gp[my_num].ojst[i] = gp[my_num].rljst[i];
|
||||
}
|
||||
if (gp[my_num].rlien[i] == imx[i]-2) {
|
||||
gp[my_num].rlien[i] = gp[my_num].rlien[i] - 1;
|
||||
if (gp[my_num].rlien[i] % 2 == 0) {
|
||||
gp[my_num].ojest[i] = gp[my_num].ojst[i];
|
||||
gp[my_num].ejest[i] = gp[my_num].ejst[i];
|
||||
} else {
|
||||
gp[my_num].ojest[i] = gp[my_num].ejst[i];
|
||||
gp[my_num].ejest[i] = gp[my_num].ojst[i];
|
||||
}
|
||||
}
|
||||
if (gp[my_num].rljen[i] == jmx[i]-2) {
|
||||
gp[my_num].rljen[i] = gp[my_num].rljen[i] - 1;
|
||||
if (gp[my_num].rljen[i] % 2 == 0) {
|
||||
gp[my_num].oiest[i] = gp[my_num].oist[i];
|
||||
gp[my_num].eiest[i] = gp[my_num].eist[i];
|
||||
} else {
|
||||
gp[my_num].oiest[i] = gp[my_num].eist[i];
|
||||
gp[my_num].eiest[i] = gp[my_num].oist[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* initialize constants and variables
|
||||
|
||||
id is a global shared variable that has fetch-and-add operations
|
||||
performed on it by processes to obtain their pids. */
|
||||
|
||||
global->id = 0;
|
||||
global->psibi = 0.0;
|
||||
pi = atan(1.0);
|
||||
pi = 4.*pi;
|
||||
|
||||
factjacob = -1./(12.*res*res);
|
||||
factlap = 1./(res*res);
|
||||
eig2 = -h*f0*f0/(h1*h3*gpr);
|
||||
jmm1 = jm-1 ;
|
||||
ysca = ((double) jmm1)*res ;
|
||||
for (i=0;i<im;i++) {
|
||||
for (j=0;j<jm;j++) {
|
||||
guess->oldga[i][j] = 0.0;
|
||||
guess->oldgb[i][j] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (do_output) {
|
||||
printf(" MULTIGRID OUTPUTS\n");
|
||||
}
|
||||
|
||||
CREATE(slave, nprocs);
|
||||
WAIT_FOR_END(nprocs);
|
||||
CLOCK(computeend)
|
||||
|
||||
printf("\n");
|
||||
printf(" PROCESS STATISTICS\n");
|
||||
printf(" Total Multigrid Multigrid\n");
|
||||
printf(" Proc Time Time Fraction\n");
|
||||
printf(" 0 %15.0f %15.0f %10.3f\n", gp[0].total_time,gp[0].multi_time, gp[0].multi_time/gp[0].total_time);
|
||||
|
||||
if (do_stats) {
|
||||
min_total = max_total = avg_total = gp[0].total_time;
|
||||
min_multi = max_multi = avg_multi = gp[0].multi_time;
|
||||
min_frac = max_frac = avg_frac = gp[0].multi_time/gp[0].total_time;
|
||||
for (i=1;i<nprocs;i++) {
|
||||
if (gp[i].total_time > max_total) {
|
||||
max_total = gp[i].total_time;
|
||||
}
|
||||
if (gp[i].total_time < min_total) {
|
||||
min_total = gp[i].total_time;
|
||||
}
|
||||
if (gp[i].multi_time > max_multi) {
|
||||
max_multi = gp[i].multi_time;
|
||||
}
|
||||
if (gp[i].multi_time < min_multi) {
|
||||
min_multi = gp[i].multi_time;
|
||||
}
|
||||
if (gp[i].multi_time/gp[i].total_time > max_frac) {
|
||||
max_frac = gp[i].multi_time/gp[i].total_time;
|
||||
}
|
||||
if (gp[i].multi_time/gp[i].total_time < min_frac) {
|
||||
min_frac = gp[i].multi_time/gp[i].total_time;
|
||||
}
|
||||
avg_total += gp[i].total_time;
|
||||
avg_multi += gp[i].multi_time;
|
||||
avg_frac += gp[i].multi_time/gp[i].total_time;
|
||||
}
|
||||
avg_total = avg_total / nprocs;
|
||||
avg_multi = avg_multi / nprocs;
|
||||
avg_frac = avg_frac / nprocs;
|
||||
for (i=1;i<nprocs;i++) {
|
||||
printf(" %3ld %15.0f %15.0f %10.3f\n", i, gp[i].total_time, gp[i].multi_time, gp[i].multi_time/gp[i].total_time);
|
||||
}
|
||||
printf(" Avg %15.0f %15.0f %10.3f\n", avg_total,avg_multi,avg_frac);
|
||||
printf(" Min %15.0f %15.0f %10.3f\n", min_total,min_multi,min_frac);
|
||||
printf(" Max %15.0f %15.0f %10.3f\n", max_total,max_multi,max_frac);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
global->starttime = start;
|
||||
printf(" TIMING INFORMATION\n");
|
||||
printf("Start time : %16lu\n", global->starttime);
|
||||
printf("Initialization finish time : %16lu\n", global->trackstart);
|
||||
printf("Overall finish time : %16lu\n", computeend);
|
||||
printf("Total time with initialization : %16lu\n", computeend-global->starttime);
|
||||
printf("Total time without initialization : %16lu\n", computeend-global->trackstart);
|
||||
printf(" (excludes first timestep)\n");
|
||||
printf("\n");
|
||||
|
||||
MAIN_END
|
||||
}
|
||||
|
||||
long log_2(long number)
|
||||
{
|
||||
long cumulative = 1;
|
||||
long out = 0;
|
||||
long done = 0;
|
||||
|
||||
while ((cumulative < number) && (!done) && (out < 50)) {
|
||||
if (cumulative == number) {
|
||||
done = 1;
|
||||
} else {
|
||||
cumulative = cumulative * 2;
|
||||
out ++;
|
||||
}
|
||||
}
|
||||
|
||||
if (cumulative == number) {
|
||||
return(out);
|
||||
} else {
|
||||
return(-1);
|
||||
}
|
||||
}
|
||||
|
||||
void printerr(char *s)
|
||||
{
|
||||
fprintf(stderr,"ERROR: %s\n",s);
|
||||
}
|
451
splash2/codes/apps/ocean/non_contiguous_partitions/multi.C
Normal file
451
splash2/codes/apps/ocean/non_contiguous_partitions/multi.C
Normal file
|
@ -0,0 +1,451 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* shared memory implementation of the multigrid method
|
||||
implementation uses red-black gauss-seidel relaxation
|
||||
iterations, w cycles, and the method of half-injection for
|
||||
residual computation */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include "decs.h"
|
||||
|
||||
/* perform multigrid (w cycles) */
|
||||
void multig(long my_id)
|
||||
{
|
||||
long iter;
|
||||
double wu;
|
||||
double errp;
|
||||
long m;
|
||||
long minlevel;
|
||||
long flag1;
|
||||
long flag2;
|
||||
long k;
|
||||
long my_num;
|
||||
double wmax;
|
||||
double local_err;
|
||||
double red_local_err;
|
||||
double black_local_err;
|
||||
double g_error;
|
||||
|
||||
flag1 = 0;
|
||||
flag2 = 0;
|
||||
iter = 0;
|
||||
m = numlev-1;
|
||||
wmax = maxwork;
|
||||
minlevel = minlev;
|
||||
my_num = my_id;
|
||||
wu = 0.0;
|
||||
|
||||
k = m;
|
||||
g_error = 1.0e30;
|
||||
while ((!flag1) && (!flag2)) {
|
||||
errp = g_error;
|
||||
iter++;
|
||||
if (my_num == MASTER) {
|
||||
multi->err_multi = 0.0;
|
||||
}
|
||||
|
||||
/* barrier to make sure all procs have finished intadd or rescal */
|
||||
/* before proceeding with relaxation */
|
||||
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
relax(k,&red_local_err,RED_ITER,my_num);
|
||||
|
||||
/* barrier to make sure all red computations have been performed */
|
||||
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
relax(k,&black_local_err,BLACK_ITER,my_num);
|
||||
|
||||
/* compute max local error from red_local_err and black_local_err */
|
||||
|
||||
if (red_local_err > black_local_err) {
|
||||
local_err = red_local_err;
|
||||
} else {
|
||||
local_err = black_local_err;
|
||||
}
|
||||
|
||||
/* update the global error if necessary */
|
||||
|
||||
LOCK(locks->error_lock)
|
||||
if (local_err > multi->err_multi) {
|
||||
multi->err_multi = local_err;
|
||||
}
|
||||
UNLOCK(locks->error_lock)
|
||||
|
||||
/* a single relaxation sweep at the finest level is one unit of */
|
||||
/* work */
|
||||
|
||||
wu+=pow((double)4.0,(double)k-m);
|
||||
|
||||
/* barrier to make sure all processors have checked local error */
|
||||
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
g_error = multi->err_multi;
|
||||
|
||||
/* barrier to make sure master does not cycle back to top of loop */
|
||||
/* and reset global->err before we read it and decide what to do */
|
||||
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->error_barrier,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
if (g_error >= lev_tol[k]) {
|
||||
if (wu > wmax) {
|
||||
/* max work exceeded */
|
||||
flag1 = 1;
|
||||
fprintf(stderr,"ERROR: Maximum work limit %0.5f exceeded\n",wmax);
|
||||
exit(-1);
|
||||
} else {
|
||||
/* if we have not converged */
|
||||
if ((k != 1) && (g_error/errp >= 0.6) && (k > minlevel)) {
|
||||
/* if need to go to coarser grid */
|
||||
rescal(k,my_num);
|
||||
/* transfer residual to rhs of coarser grid */
|
||||
lev_tol[k-1] = 0.3 * g_error;
|
||||
k = k-1;
|
||||
putz(k,my_num);
|
||||
/* make initial guess on coarser grid zero */
|
||||
g_error = 1.0e30;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* if we have converged at this level */
|
||||
if (k == m) {
|
||||
/* if finest grid, we are done */
|
||||
flag2 = 1;
|
||||
} else {
|
||||
/* else go to next finest grid */
|
||||
intadd(k,my_num);
|
||||
k++;
|
||||
g_error = 1.0e30;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (do_output) {
|
||||
if (my_num == MASTER) {
|
||||
printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter,k,multi->err_multi,wu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* perform red or black iteration (not both) */
|
||||
void relax(long k, double *err, long color, long my_num)
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long iend;
|
||||
long jend;
|
||||
long oddistart;
|
||||
long oddjstart;
|
||||
long evenistart;
|
||||
long evenjstart;
|
||||
long oddiendst;
|
||||
long eveniendst;
|
||||
long oddjendst;
|
||||
long evenjendst;
|
||||
double a;
|
||||
double h;
|
||||
double factor;
|
||||
double maxerr;
|
||||
double newerr;
|
||||
double oldval;
|
||||
double newval;
|
||||
|
||||
i = 0;
|
||||
j = 0;
|
||||
|
||||
*err = 0.0;
|
||||
h = lev_res[k];
|
||||
|
||||
/* points whose sum of row and col index is even do a red iteration, */
|
||||
/* others do a black */
|
||||
|
||||
evenistart = gp[my_num].eist[k];
|
||||
evenjstart = gp[my_num].ejst[k];
|
||||
oddistart = gp[my_num].oist[k];
|
||||
oddjstart = gp[my_num].ojst[k];
|
||||
eveniendst = gp[my_num].eiest[k];
|
||||
evenjendst = gp[my_num].ejest[k];
|
||||
oddiendst = gp[my_num].oiest[k];
|
||||
oddjendst = gp[my_num].ojest[k];
|
||||
|
||||
iend = gp[my_num].rel_start_y[k] + gp[my_num].rel_num_y[k];
|
||||
jend = gp[my_num].rel_start_x[k] + gp[my_num].rel_num_x[k];
|
||||
|
||||
factor = 4.0 - eig2 * h * h ;
|
||||
maxerr = 0.0;
|
||||
if (color == RED_ITER) {
|
||||
for (i=evenistart;i<iend;i+=2) {
|
||||
for (j=evenjstart;j<jend;j+=2) {
|
||||
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
|
||||
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
|
||||
multi->rhs_multi[k][i][j] ;
|
||||
oldval = multi->q_multi[k][i][j];
|
||||
newval = a / factor;
|
||||
newerr = oldval - newval;
|
||||
multi->q_multi[k][i][j] = newval;
|
||||
if (fabs(newerr) > maxerr) {
|
||||
maxerr = fabs(newerr);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i=oddistart;i<iend;i+=2) {
|
||||
for (j=oddjstart;j<jend;j+=2) {
|
||||
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
|
||||
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
|
||||
multi->rhs_multi[k][i][j] ;
|
||||
oldval = multi->q_multi[k][i][j];
|
||||
newval = a / factor;
|
||||
newerr = oldval - newval;
|
||||
multi->q_multi[k][i][j] = newval;
|
||||
if (fabs(newerr) > maxerr) {
|
||||
maxerr = fabs(newerr);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (color == BLACK_ITER) {
|
||||
for (i=evenistart;i<iend;i+=2) {
|
||||
for (j=oddjstart;j<jend;j+=2) {
|
||||
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
|
||||
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
|
||||
multi->rhs_multi[k][i][j] ;
|
||||
oldval = multi->q_multi[k][i][j];
|
||||
newval = a / factor;
|
||||
newerr = oldval - newval;
|
||||
multi->q_multi[k][i][j] = newval;
|
||||
if (fabs(newerr) > maxerr) {
|
||||
maxerr = fabs(newerr);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i=oddistart;i<iend;i+=2) {
|
||||
for (j=evenjstart;j<jend;j+=2) {
|
||||
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
|
||||
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
|
||||
multi->rhs_multi[k][i][j] ;
|
||||
oldval = multi->q_multi[k][i][j];
|
||||
newval = a / factor;
|
||||
newerr = oldval - newval;
|
||||
multi->q_multi[k][i][j] = newval;
|
||||
if (fabs(newerr) > maxerr) {
|
||||
maxerr = fabs(newerr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*err = maxerr;
|
||||
}
|
||||
|
||||
/* perform half-injection to next coarsest level */
|
||||
void rescal(long kf, long my_num)
|
||||
{
|
||||
long ic;
|
||||
long if17;
|
||||
long jf;
|
||||
long jc;
|
||||
long krc;
|
||||
long istart;
|
||||
long iend;
|
||||
long jstart;
|
||||
long jend;
|
||||
double hf;
|
||||
double hc;
|
||||
double s;
|
||||
double s1;
|
||||
double s2;
|
||||
double s3;
|
||||
double s4;
|
||||
double factor;
|
||||
double int1;
|
||||
double int2;
|
||||
double i_int_factor;
|
||||
double j_int_factor;
|
||||
double int_val;
|
||||
|
||||
krc = kf - 1;
|
||||
hc = lev_res[krc];
|
||||
hf = lev_res[kf];
|
||||
|
||||
istart = gp[my_num].rlist[krc];
|
||||
jstart = gp[my_num].rljst[krc];
|
||||
iend = gp[my_num].rlien[krc];
|
||||
jend = gp[my_num].rljen[krc];
|
||||
iend = gp[my_num].rel_start_y[krc] + gp[my_num].rel_num_y[krc] - 1;
|
||||
jend = gp[my_num].rel_start_x[krc] + gp[my_num].rel_num_x[krc] - 1;
|
||||
|
||||
factor = 4.0 - eig2 * hf * hf;
|
||||
|
||||
if17=2*(istart-1);
|
||||
for(ic=istart;ic<=iend;ic++) {
|
||||
if17+=2;
|
||||
i_int_factor = ic * i_int_coeff[krc] * 0.5;
|
||||
jf = 2 * (jstart - 1);
|
||||
for(jc=jstart;jc<=jend;jc++) {
|
||||
jf+=2;
|
||||
j_int_factor = jc*j_int_coeff[krc] * 0.5;
|
||||
/* method of half-injection uses 2.0 instead of 4.0 */
|
||||
s = multi->q_multi[kf][if17][jf+1] + multi->q_multi[kf][if17][jf-1] +
|
||||
multi->q_multi[kf][if17-1][jf] + multi->q_multi[kf][if17+1][jf];
|
||||
s1 = 2.0 * (multi->rhs_multi[kf][if17][jf] - s +
|
||||
factor * multi->q_multi[kf][if17][jf]);
|
||||
if ((if17 == 2) || (jf ==2)) {
|
||||
s2 = 0;
|
||||
} else {
|
||||
s = multi->q_multi[kf][if17][jf-1] + multi->q_multi[kf][if17][jf-3] +
|
||||
multi->q_multi[kf][if17-1][jf-2] + multi->q_multi[kf][if17+1][jf-2];
|
||||
s2 = 2.0 * (multi->rhs_multi[kf][if17][jf-2] - s +
|
||||
factor * multi->q_multi[kf][if17][jf-2]);
|
||||
}
|
||||
if ((if17 == 2) || (jf ==2)) {
|
||||
s3 = 0;
|
||||
} else {
|
||||
s = multi->q_multi[kf][if17-2][jf+1] + multi->q_multi[kf][if17-2][jf-1] +
|
||||
multi->q_multi[kf][if17-3][jf] + multi->q_multi[kf][if17-1][jf];
|
||||
s3 = 2.0 * (multi->rhs_multi[kf][if17-2][jf] - s +
|
||||
factor * multi->q_multi[kf][if17-2][jf]);
|
||||
}
|
||||
if ((if17 == 2) || (jf ==2)) {
|
||||
s4 = 0;
|
||||
} else {
|
||||
s = multi->q_multi[kf][if17-2][jf-1] + multi->q_multi[kf][if17-2][jf-3] +
|
||||
multi->q_multi[kf][if17-3][jf-2] + multi->q_multi[kf][if17-1][jf-2];
|
||||
s4 = 2.0 * (multi->rhs_multi[kf][if17-2][jf-2] - s +
|
||||
factor * multi->q_multi[kf][if17-2][jf-2]);
|
||||
}
|
||||
int1 = j_int_factor*s4 + (1.0-j_int_factor)*s3;
|
||||
int2 = j_int_factor*s2 + (1.0-j_int_factor)*s1;
|
||||
int_val = i_int_factor*int1+(1.0-i_int_factor)*int2;
|
||||
multi->rhs_multi[krc][ic][jc] = i_int_factor*int1+(1.0-i_int_factor)*int2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* perform interpolation and addition to next finest grid */
|
||||
void intadd(long kc, long my_num)
|
||||
{
|
||||
long ic;
|
||||
long if17;
|
||||
long jf;
|
||||
long jc;
|
||||
long kf;
|
||||
long istart;
|
||||
long jstart;
|
||||
long iend;
|
||||
long jend;
|
||||
double hc;
|
||||
double hf;
|
||||
long ifine1;
|
||||
long ifine2;
|
||||
long jfine1;
|
||||
long jfine2;
|
||||
double int1;
|
||||
double int2;
|
||||
double i_int_factor1;
|
||||
double j_int_factor1;
|
||||
double i_int_factor2;
|
||||
double j_int_factor2;
|
||||
|
||||
kf = kc + 1;
|
||||
hc = lev_res[kc];
|
||||
hf = lev_res[kf];
|
||||
|
||||
istart = gp[my_num].iist[kc];
|
||||
jstart = gp[my_num].ijst[kc];
|
||||
iend = gp[my_num].iien[kc];
|
||||
jend = gp[my_num].ijen[kc];
|
||||
|
||||
istart = gp[my_num].rel_start_y[kc];
|
||||
jstart = gp[my_num].rel_start_x[kc];
|
||||
iend = gp[my_num].rel_start_y[kc] + gp[my_num].rel_num_y[kc] - 1;
|
||||
jend = gp[my_num].rel_start_x[kc] + gp[my_num].rel_num_x[kc] - 1;
|
||||
if17 = 2*(istart-1);
|
||||
for(ic=istart;ic<=iend;ic++) {
|
||||
|
||||
if17+=2;
|
||||
ifine1 = if17-1;
|
||||
ifine2 = if17;
|
||||
i_int_factor1= ((imx[kc]-2)-(ic-1)) * (i_int_coeff[kf]);
|
||||
i_int_factor2= ic * i_int_coeff[kf];
|
||||
|
||||
jf = 2*(jstart-1);
|
||||
|
||||
for(jc=jstart;jc<=jend;jc++) {
|
||||
jf+=2;
|
||||
jfine1 = jf-1;
|
||||
jfine2 = jf;
|
||||
j_int_factor1= ((jmx[kc]-2)-(jc-1)) * (j_int_coeff[kf]);
|
||||
j_int_factor2= jc * j_int_coeff[kf];
|
||||
|
||||
int1 = j_int_factor1*multi->q_multi[kc][ic][jc-1] +
|
||||
(1.0-j_int_factor1)*multi->q_multi[kc][ic][jc];
|
||||
int2 = j_int_factor1*multi->q_multi[kc][ic-1][jc-1] +
|
||||
(1.0-j_int_factor1)*multi->q_multi[kc][ic-1][jc];
|
||||
multi->q_multi[kf][if17-1][jf-1] += i_int_factor1*int2 +
|
||||
(1.0-i_int_factor1)*int1;
|
||||
int2 = j_int_factor1*multi->q_multi[kc][ic+1][jc-1] +
|
||||
(1.0-j_int_factor1)*multi->q_multi[kc][ic+1][jc];
|
||||
multi->q_multi[kf][if17][jf-1] += i_int_factor2*int2 +
|
||||
(1.0-i_int_factor2)*int1;
|
||||
int1 = j_int_factor2*multi->q_multi[kc][ic][jc+1] +
|
||||
(1.0-j_int_factor2)*multi->q_multi[kc][ic][jc];
|
||||
int2 = j_int_factor2*multi->q_multi[kc][ic-1][jc+1] +
|
||||
(1.0-j_int_factor2)*multi->q_multi[kc][ic-1][jc];
|
||||
multi->q_multi[kf][if17-1][jf] += i_int_factor1*int2 +
|
||||
(1.0-i_int_factor1)*int1;
|
||||
int2 = j_int_factor2*multi->q_multi[kc][ic+1][jc+1] +
|
||||
(1.0-j_int_factor2)*multi->q_multi[kc][ic+1][jc];
|
||||
multi->q_multi[kf][if17][jf] += i_int_factor2*int2 +
|
||||
(1.0-i_int_factor2)*int1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* initialize a grid to zero in parallel */
|
||||
void putz(long k, long my_num)
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long istart;
|
||||
long jstart;
|
||||
long iend;
|
||||
long jend;
|
||||
|
||||
istart = gp[my_num].pist[k];
|
||||
jstart = gp[my_num].pjst[k];
|
||||
iend = gp[my_num].pien[k];
|
||||
jend = gp[my_num].pjen[k];
|
||||
for (i=istart;i<=iend;i++) {
|
||||
for (j=jstart;j<=jend;j++) {
|
||||
multi->q_multi[k][i][j] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
627
splash2/codes/apps/ocean/non_contiguous_partitions/slave1.C
Normal file
627
splash2/codes/apps/ocean/non_contiguous_partitions/slave1.C
Normal file
|
@ -0,0 +1,627 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* ****************
|
||||
subroutine slave
|
||||
**************** */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "decs.h"
|
||||
|
||||
void slave()
|
||||
{
|
||||
long i;
|
||||
long j;
|
||||
long nstep;
|
||||
long iindex;
|
||||
long iday;
|
||||
double ysca1;
|
||||
double y;
|
||||
double factor;
|
||||
double sintemp;
|
||||
double curlt;
|
||||
double ressqr;
|
||||
long istart;
|
||||
long iend;
|
||||
long jstart;
|
||||
long jend;
|
||||
long ist;
|
||||
long ien;
|
||||
long jst;
|
||||
long jen;
|
||||
double fac;
|
||||
long dayflag=0;
|
||||
long dhourflag=0;
|
||||
long endflag=0;
|
||||
double ttime;
|
||||
double dhour;
|
||||
double day;
|
||||
long firstrow;
|
||||
long lastrow;
|
||||
long numrows;
|
||||
long firstcol;
|
||||
long lastcol;
|
||||
long numcols;
|
||||
long psiindex;
|
||||
double psibipriv;
|
||||
long psinum;
|
||||
long procid;
|
||||
unsigned long t1;
|
||||
|
||||
ressqr = lev_res[numlev-1] * lev_res[numlev-1];
|
||||
|
||||
LOCK(locks->idlock)
|
||||
procid = global->id;
|
||||
global->id = global->id+1;
|
||||
UNLOCK(locks->idlock)
|
||||
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
||||
processors to avoid migration. */
|
||||
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute
|
||||
data structures across physically distributed memories in
|
||||
a round-robin fashion. */
|
||||
|
||||
firstcol = gp[procid].rel_start_x[numlev-1];
|
||||
lastcol = firstcol + gp[procid].rel_num_x[numlev-1] - 1;
|
||||
firstrow = gp[procid].rel_start_y[numlev-1];
|
||||
lastrow = firstrow + gp[procid].rel_num_y[numlev-1] - 1;
|
||||
numcols = gp[procid].rel_num_x[numlev-1];
|
||||
numrows = gp[procid].rel_num_y[numlev-1];
|
||||
|
||||
if (procid > nprocs/2) {
|
||||
psinum = 2;
|
||||
} else {
|
||||
psinum = 1;
|
||||
}
|
||||
|
||||
/* every process gets its own copy of the timing variables to avoid
|
||||
contention at shared memory locations. here, these variables
|
||||
are initialized. */
|
||||
|
||||
ttime = 0.0;
|
||||
dhour = 0.0;
|
||||
nstep = 0 ;
|
||||
day = 0.0;
|
||||
|
||||
ysca1 = 0.5*ysca;
|
||||
if (procid == MASTER) {
|
||||
for(iindex = 0;iindex<=jm-1;iindex++) {
|
||||
y = ((double) iindex)*res;
|
||||
wrk2->f[iindex] = f0+beta*(y-ysca1);
|
||||
}
|
||||
}
|
||||
|
||||
if (procid == MASTER) {
|
||||
fields2->psium[0][0]=0.0;
|
||||
}
|
||||
if (procid == nprocs-xprocs) {
|
||||
fields2->psium[im-1][0]=0.0;
|
||||
}
|
||||
if (procid == xprocs-1) {
|
||||
fields2->psium[0][jm-1]=0.0;
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
fields2->psium[im-1][jm-1]=0.0;
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields2->psium[0][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields2->psium[im-1][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields2->psium[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields2->psium[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
fields2->psium[i][iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
if (procid == MASTER) {
|
||||
fields2->psilm[0][0]=0.0;
|
||||
}
|
||||
if (procid == nprocs-xprocs) {
|
||||
fields2->psilm[im-1][0]=0.0;
|
||||
}
|
||||
if (procid == xprocs-1) {
|
||||
fields2->psilm[0][jm-1]=0.0;
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
fields2->psilm[im-1][jm-1]=0.0;
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields2->psilm[0][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields2->psilm[im-1][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields2->psilm[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields2->psilm[j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
fields2->psilm[i][iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (procid == MASTER) {
|
||||
wrk1->psib[0][0]=1.0;
|
||||
}
|
||||
if (procid == xprocs-1) {
|
||||
wrk1->psib[0][jm-1]=1.0;
|
||||
}
|
||||
if (procid == nprocs-xprocs) {
|
||||
wrk1->psib[im-1][0]=1.0;
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
wrk1->psib[im-1][jm-1]=1.0;
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
wrk1->psib[0][j] = 1.0;
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
wrk1->psib[im-1][j] = 1.0;
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
wrk1->psib[j][0] = 1.0;
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
wrk1->psib[j][jm-1] = 1.0;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
wrk1->psib[i][iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* wait until all processes have completed the above initialization */
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_prini,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
istart = gp[procid].rel_start_y[numlev-1];
|
||||
iend = istart + gp[procid].rel_num_y[numlev-1] - 1;
|
||||
jstart = gp[procid].rel_start_x[numlev-1];
|
||||
jend = jstart + gp[procid].rel_num_x[numlev-1] - 1;
|
||||
ist = istart;
|
||||
ien = iend;
|
||||
jst = jstart;
|
||||
jen = jend;
|
||||
if (istart == 1) {
|
||||
istart = 0;
|
||||
}
|
||||
if (jstart == 1) {
|
||||
jstart = 0;
|
||||
}
|
||||
if (iend == im-2) {
|
||||
iend = im-1;
|
||||
}
|
||||
if (jend == jm-2) {
|
||||
jend = jm-1;
|
||||
}
|
||||
for(i=istart;i<=iend;i++) {
|
||||
for(j=jstart;j<=jend;j++) {
|
||||
multi->rhs_multi[numlev-1][i][j] = wrk1->psib[i][j] * ressqr;
|
||||
}
|
||||
}
|
||||
if (istart == 0) {
|
||||
for(j=jstart;j<=jend;j++) {
|
||||
multi->q_multi[numlev-1][0][j] = wrk1->psib[0][j];
|
||||
}
|
||||
}
|
||||
if (iend == im-1) {
|
||||
for(j=jstart;j<=jend;j++) {
|
||||
multi->q_multi[numlev-1][im-1][j] = wrk1->psib[im-1][j];
|
||||
}
|
||||
}
|
||||
if (jstart == 0) {
|
||||
for(i=istart;i<=iend;i++) {
|
||||
multi->q_multi[numlev-1][i][0] = wrk1->psib[i][0];
|
||||
}
|
||||
}
|
||||
if (jend == jm-1) {
|
||||
for(i=istart;i<=iend;i++) {
|
||||
multi->q_multi[numlev-1][i][jm-1] = wrk1->psib[i][jm-1];
|
||||
}
|
||||
}
|
||||
|
||||
fac = 1.0 / (4.0 - ressqr*eig2);
|
||||
for(i=ist;i<=ien;i++) {
|
||||
for(j=jst;j<=jen;j++) {
|
||||
multi->q_multi[numlev-1][i][j] = fac * (wrk1->psib[i+1][j] +
|
||||
wrk1->psib[i-1][j] + wrk1->psib[i][j+1] + wrk1->psib[i][j-1] -
|
||||
ressqr*wrk1->psib[i][j]);
|
||||
}
|
||||
}
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_prini,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
multig(procid);
|
||||
|
||||
for(i=istart;i<=iend;i++) {
|
||||
for(j=jstart;j<=jend;j++) {
|
||||
wrk1->psib[i][j] = multi->q_multi[numlev-1][i][j];
|
||||
}
|
||||
}
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_psini,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
/* update the local running sum psibipriv by summing all the resulting
|
||||
values in that process's share of the psib matrix */
|
||||
|
||||
psibipriv=0.0;
|
||||
if (procid == MASTER) {
|
||||
psibipriv = psibipriv + 0.25*(wrk1->psib[0][0]);
|
||||
}
|
||||
if (procid == xprocs-1){
|
||||
psibipriv = psibipriv + 0.25*(wrk1->psib[0][jm-1]);
|
||||
}
|
||||
if (procid == nprocs - xprocs) {
|
||||
psibipriv=psibipriv+0.25*(wrk1->psib[im-1][0]);
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
psibipriv=psibipriv+0.25*(wrk1->psib[im-1][jm-1]);
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
psibipriv = psibipriv + 0.5*wrk1->psib[0][j];
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
psibipriv = psibipriv + 0.5*wrk1->psib[im-1][j];
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
psibipriv = psibipriv + 0.5*wrk1->psib[j][0];
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
psibipriv = psibipriv + 0.5*wrk1->psib[j][jm-1];
|
||||
}
|
||||
}
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
psibipriv = psibipriv + wrk1->psib[i][iindex];
|
||||
}
|
||||
}
|
||||
|
||||
/* update the shared variable psibi by summing all the psibiprivs
|
||||
of the individual processes into it. note that this combined
|
||||
private and shared sum method avoids accessing the shared
|
||||
variable psibi once for every element of the matrix. */
|
||||
|
||||
LOCK(locks->psibilock)
|
||||
global->psibi = global->psibi + psibipriv;
|
||||
UNLOCK(locks->psibilock)
|
||||
|
||||
for(psiindex=0;psiindex<=1;psiindex++) {
|
||||
if (procid == MASTER) {
|
||||
fields->psim[psiindex][0][0] = 0.0;
|
||||
}
|
||||
if (procid == nprocs-xprocs) {
|
||||
fields->psim[psiindex][im-1][0] = 0.0;
|
||||
}
|
||||
if (procid == xprocs-1) {
|
||||
fields->psim[psiindex][0][jm-1] = 0.0;
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
fields->psim[psiindex][im-1][jm-1] = 0.0;
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields->psim[psiindex][0][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields->psim[psiindex][im-1][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields->psim[psiindex][j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields->psim[psiindex][j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
fields->psim[psiindex][i][iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* initialize psi matrices the same way */
|
||||
|
||||
for(psiindex=0;psiindex<=1;psiindex++) {
|
||||
if (procid == MASTER) {
|
||||
fields->psi[psiindex][0][0] = 0.0;
|
||||
}
|
||||
if (procid == xprocs-1) {
|
||||
fields->psi[psiindex][0][jm-1] = 0.0;
|
||||
}
|
||||
if (procid == nprocs-xprocs) {
|
||||
fields->psi[psiindex][im-1][0] = 0.0;
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
fields->psi[psiindex][im-1][jm-1] = 0.0;
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields->psi[psiindex][0][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields->psi[psiindex][im-1][j] = 0.0;
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields->psi[psiindex][j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields->psi[psiindex][j][jm-1] = 0.0;
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
fields->psi[psiindex][i][iindex] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* compute input curl of wind stress */
|
||||
|
||||
ysca1 = .5*ysca;
|
||||
factor= -t0*pi/ysca1;
|
||||
if (procid == MASTER) {
|
||||
frcng->tauz[0][0] = 0.0;
|
||||
}
|
||||
if (procid == nprocs-xprocs) {
|
||||
frcng->tauz[im-1][0] = 0.0;
|
||||
}
|
||||
if (procid == xprocs-1) {
|
||||
sintemp = pi*((double) jmm1)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
frcng->tauz[0][jm-1] = factor*sintemp;
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
sintemp = pi*((double) jmm1)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
frcng->tauz[im-1][jm-1] = frcng->tauz[0][jm-1];
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
sintemp = pi*((double) j)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
curlt = factor*sintemp;
|
||||
frcng->tauz[0][j] = curlt;
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
sintemp = pi*((double) j)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
curlt = factor*sintemp;
|
||||
frcng->tauz[im-1][j] = curlt;
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
frcng->tauz[j][0] = 0.0;
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
sintemp = pi*((double) jmm1)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
curlt = factor*sintemp;
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
frcng->tauz[j][jm-1] = curlt;
|
||||
}
|
||||
}
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
sintemp = pi*((double) iindex)*res/ysca1;
|
||||
sintemp = sin(sintemp);
|
||||
curlt = factor*sintemp;
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
frcng->tauz[i][iindex] = curlt;
|
||||
}
|
||||
}
|
||||
#if defined(MULTIPLE_BARRIERS)
|
||||
BARRIER(bars->sl_onetime,nprocs)
|
||||
#else
|
||||
BARRIER(bars->barrier,nprocs)
|
||||
#endif
|
||||
|
||||
/***************************************************************
|
||||
one-time stuff over at this point
|
||||
***************************************************************/
|
||||
|
||||
while (!endflag) {
|
||||
while ((!dayflag) || (!dhourflag)) {
|
||||
dayflag = 0;
|
||||
dhourflag = 0;
|
||||
if (nstep == 1) {
|
||||
if (procid == MASTER) {
|
||||
CLOCK(global->trackstart)
|
||||
}
|
||||
if ((procid == MASTER) || (do_stats)) {
|
||||
CLOCK(t1);
|
||||
gp[procid].total_time = t1;
|
||||
gp[procid].multi_time = 0;
|
||||
}
|
||||
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
||||
statistics that one is measuring about the parallel execution */
|
||||
}
|
||||
|
||||
slave2(procid,firstrow,lastrow,numrows,firstcol,lastcol,numcols);
|
||||
|
||||
/* update time and step number
|
||||
note that these time and step variables are private i.e. every
|
||||
process has its own copy and keeps track of its own time */
|
||||
|
||||
ttime = ttime + dtau;
|
||||
nstep = nstep + 1;
|
||||
day = ttime/86400.0;
|
||||
if (day > ((double) outday0)) {
|
||||
dayflag = 1;
|
||||
iday = (long) day;
|
||||
dhour = dhour+dtau;
|
||||
if (dhour >= 86400.0) {
|
||||
dhourflag = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
dhour = 0.0;
|
||||
|
||||
/* update values of psium array to psium + psim{1} */
|
||||
|
||||
if (procid == MASTER) {
|
||||
fields2->psium[0][0] = fields2->psium[0][0]+fields->psim[0][0][0];
|
||||
}
|
||||
if (procid == nprocs-xprocs) {
|
||||
fields2->psium[im-1][0] = fields2->psium[im-1][0]+fields->psim[0][im-1][0];
|
||||
}
|
||||
if (procid == xprocs-1) {
|
||||
fields2->psium[0][jm-1] = fields2->psium[0][jm-1]+fields->psim[0][0][jm-1];
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
fields2->psium[im-1][jm-1] = fields2->psium[im-1][jm-1]+fields->psim[0][im-1][jm-1];
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields2->psium[0][j] = fields2->psium[0][j]+fields->psim[0][0][j];
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields2->psium[im-1][j] = fields2->psium[im-1][j]+fields->psim[0][im-1][j];
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields2->psium[j][0] = fields2->psium[j][0]+fields->psim[0][j][0];
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields2->psium[j][jm-1] = fields2->psium[j][jm-1]+fields->psim[0][j][jm-1];
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
fields2->psium[i][iindex] = fields2->psium[i][iindex]+fields->psim[0][i][iindex];
|
||||
}
|
||||
}
|
||||
|
||||
/* update values of psilm array to psilm + psim[2] */
|
||||
|
||||
if (procid == MASTER) {
|
||||
fields2->psilm[0][0] = fields2->psilm[0][0]+fields->psim[1][0][0];
|
||||
}
|
||||
if (procid == nprocs-xprocs) {
|
||||
fields2->psilm[im-1][0] = fields2->psilm[im-1][0]+fields->psim[1][im-1][0];
|
||||
}
|
||||
if (procid == xprocs-1) {
|
||||
fields2->psilm[0][jm-1] = fields2->psilm[0][jm-1]+fields->psim[1][0][jm-1];
|
||||
}
|
||||
if (procid == nprocs-1) {
|
||||
fields2->psilm[im-1][jm-1] = fields2->psilm[im-1][jm-1]+fields->psim[1][im-1][jm-1];
|
||||
}
|
||||
if (firstrow == 1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields2->psilm[0][j] = fields2->psilm[0][j]+fields->psim[1][0][j];
|
||||
}
|
||||
}
|
||||
if ((firstrow+numrows) == im-1) {
|
||||
for(j=firstcol;j<=lastcol;j++) {
|
||||
fields2->psilm[im-1][j] = fields2->psilm[im-1][j]+fields->psim[1][im-1][j];
|
||||
}
|
||||
}
|
||||
if (firstcol == 1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields2->psilm[j][0] = fields2->psilm[j][0]+fields->psim[1][j][0];
|
||||
}
|
||||
}
|
||||
if ((firstcol+numcols) == jm-1) {
|
||||
for(j=firstrow;j<=lastrow;j++) {
|
||||
fields2->psilm[j][jm-1] = fields2->psilm[j][jm-1]+fields->psim[1][j][jm-1];
|
||||
}
|
||||
}
|
||||
for(i=firstrow;i<=lastrow;i++) {
|
||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
||||
fields2->psilm[i][iindex] = fields2->psilm[i][iindex]+fields->psim[1][i][iindex];
|
||||
}
|
||||
}
|
||||
if (iday >= (long) outday3) {
|
||||
endflag = 1;
|
||||
}
|
||||
}
|
||||
if ((procid == MASTER) || (do_stats)) {
|
||||
CLOCK(t1);
|
||||
gp[procid].total_time = t1-gp[procid].total_time;
|
||||
}
|
||||
}
|
1006
splash2/codes/apps/ocean/non_contiguous_partitions/slave2.C
Normal file
1006
splash2/codes/apps/ocean/non_contiguous_partitions/slave2.C
Normal file
File diff suppressed because it is too large
Load diff
31
splash2/codes/apps/radiosity/Makefile
Normal file
31
splash2/codes/apps/radiosity/Makefile
Normal file
|
@ -0,0 +1,31 @@
|
|||
TARGET = RADIOSITY
|
||||
|
||||
OBJS = display.o elemman.o modelman.o patchman.o rad_main.o rad_tools.o room_model.o smallobj.o taskman.o visible.o
|
||||
HHS = model.H parallel.H patch.H radiosity.H task.H
|
||||
HS = model.h parallel.h patch.h radiosity.h task.h
|
||||
|
||||
include ../../Makefile.config
|
||||
|
||||
CFLAGS := $(CFLAGS) -I./glibdumb -I./glibps
|
||||
|
||||
#CCOPTS = -I -float -I/usr/include $(CFLAGS)
|
||||
#LDFLAGS = -lm -lpthread
|
||||
#CFLAGS = $(CINCLUDE) -O4 -march=i686 -mcpu=i686 -malign-double -funroll-loops -finline-functions -fomit-frame-pointer -D_REENTRANT
|
||||
#CINCLUDE = -I$(GLIBDIR) -I./glibps
|
||||
|
||||
LOADLIBES = glibdumb/glib.a glibps/glibps.a
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
$(CC) $(OBJS) $(CFLAGS) $(LOADLIBES) -o $(TARGET) $(LDFLAGS)
|
||||
|
||||
radiosity.h: patch.h model.h parallel.h task.h
|
||||
rad_main.c: rad_main.C $(HS)
|
||||
patchman.c: patchman.C $(HS)
|
||||
elemman.c: elemman.C $(HS)
|
||||
taskman.c: taskman.C $(HS)
|
||||
modelman.c: modelman.C $(HS)
|
||||
smallobj.c: smallobj.C $(HS)
|
||||
display.c: display.C $(HS)
|
||||
visible.c: visible.C $(HS)
|
||||
rad_tools.c: rad_tools.C $(HS)
|
||||
room_model.c: room_model.C $(HS)
|
107
splash2/codes/apps/radiosity/README.radiosity
Normal file
107
splash2/codes/apps/radiosity/README.radiosity
Normal file
|
@ -0,0 +1,107 @@
|
|||
GENERAL INFORMATION:
|
||||
|
||||
This code computes the equilibrium distribution of light in a scene
|
||||
using the hierarchical diffuse radiosity method. A description of the
|
||||
sequential hierarchical radiosity method can be found in
|
||||
|
||||
Pat Hanrahan, David Salzman and Larry Aupperle, "A Rapid Hierarchical
|
||||
Radiosity Algorithm", Proc. SIGGRAPH 1991.
|
||||
|
||||
Descriptions of the parallel algorithm can be found in
|
||||
|
||||
Jaswinder Pal Singh, Anoop Gupta and Marc Levoy, "Parallel
|
||||
Visualization Algorithms: Performance and Architectural Implications",
|
||||
IEEE Computer, July 1994.
|
||||
|
||||
or in
|
||||
|
||||
Jaswinder Pal Singh, et al, "Load Balancing and Data Locality in
|
||||
Hierarchical N-body Methods", Stanford Univ. Tech Report CSL-TR-92-505
|
||||
(to appear in JPDC).
|
||||
|
||||
A detailed description will also be in the SPLASH-2 report.
|
||||
|
||||
The parallelism is managed with distributed task queues and task
|
||||
stealing, and there is one task queue per processor.
|
||||
|
||||
RUNNING THE PROGRAM:
|
||||
|
||||
To see how to run the program, please see the comment at the top
|
||||
of the main.C file or run it as "RADIOSITY -h". Many of the
|
||||
command-line options control the accuracy of the program's computation
|
||||
and have default values set in the program (which can be overridden by
|
||||
command-line specifications). These are indicated as such in the
|
||||
comments or usage statement, and we recommend that they be left at
|
||||
their default values for base SPLASH-2 runs.
|
||||
|
||||
The program runs in different modes, either interactive (using the SGI
|
||||
GL library) or batch. The batch mode does not attempt to display the
|
||||
rendered image, while the interactive mode brings up a GL window with
|
||||
knobs and dials to set parameters and run the program. If you are
|
||||
running on a system that does not support GL or you do not want to
|
||||
display the resulting scene, please use the -batch option on the
|
||||
command line. The makefile shows you how to link the GL libraries.
|
||||
This sample makefile is for a machine on which we do not want to
|
||||
display, in which case we use the glibdumb and glibps libraries, which
|
||||
do not support displaying and are provided with the code. The only
|
||||
real way to verify the correctness of the program is to view the
|
||||
result using GL. The commented lines in the print_statistics routine
|
||||
can be uncommented to print several runtime statistics, but these are
|
||||
nondeterministic in parallel since the program does not follow a
|
||||
deterministic execution path and does not arrive at exactly the same
|
||||
result in different parallel runs (the radiosity algorithm is
|
||||
iterative to convergence, and even the path is nondeterministic).
|
||||
|
||||
The way the program is written, it compiles in the input description
|
||||
of the scene in polygon coordinates. This is in the file room_model.C,
|
||||
which contains the descriptions of two scenes. One is the room scene
|
||||
originally used by Hanrahan et al in their SIGGRAPH paper, and the
|
||||
other is an artificial extension of this scene (removing a wall of the
|
||||
room and introducing some of the features of this room in the
|
||||
neighboring room thus created). To use the former, use the -room
|
||||
command line option; for the latter, use -largeroom. -room is what we
|
||||
call the base SPLASH problem. If you don't specify -room or
|
||||
-largeroom, the program will default to a small dummy test scene which
|
||||
is useful only for debugging the program and verifying that it works.
|
||||
|
||||
There are two types of compile-time flags that can be set in the code.
|
||||
One controls the manner in which patches are assigned to processors at
|
||||
the beginning of each time-step in the radiosity iteration. The simple
|
||||
case is to assign the same statically chosen set of patches to the
|
||||
same processor in every iteration (and rely completely on task
|
||||
stealing for load balancing); this is what happens by default, or if
|
||||
-DPATCH_ASSIGNMENT_STATIC is used as a compile-time flag. The more
|
||||
sophisticated case is to do an assignment at the beginning of a time
|
||||
step based on the costs of patches profiled in the previous time step;
|
||||
this is what happens if the -DPATCH_ASSIGNMENT_COSTBASED flag is used
|
||||
at compile-time. The latter can yield less stealing, but uses more
|
||||
synchronization to keep track of costs and hence has more overhead.
|
||||
We recommend using the default (not defining anything, so static is
|
||||
used) in the base SPLASH runs.
|
||||
|
||||
The other type of compile-time flag special cases some machines in
|
||||
terms of maximum number of processors etc. (grep for #if in *.C and
|
||||
*.H to find these) and even memory consistency model in one instance.
|
||||
|
||||
BASE PROBLEM SIZE:
|
||||
|
||||
The base problem size we recommend is to use the program as follows:
|
||||
|
||||
RADIOSITY -p ? -batch -room
|
||||
|
||||
where ? is the number of processors. This sets the ae, bf etc flags
|
||||
(see comment at top of rad_main.C or run "RADIOSITY -h" to see what
|
||||
these are) to their default values. The default values can be found
|
||||
by looking at the comment at the top of rad_main.C or running
|
||||
"RADIOSITY -h".
|
||||
|
||||
DATA DISTRIBUTION:
|
||||
|
||||
Data distribution is very difficult in this code, except for some
|
||||
per-process data structures (typically arrays of structures indexed by
|
||||
process_id; grep for MAX_PROCESSORS to find them).
|
||||
General data distribution of other (scene, e.g.), however, does not
|
||||
make much difference to performance on the Stanford DASH
|
||||
multiprocessor.
|
||||
|
||||
|
628
splash2/codes/apps/radiosity/display.C
Normal file
628
splash2/codes/apps/radiosity/display.C
Normal file
|
@ -0,0 +1,628 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
EXTERN_ENV;
|
||||
|
||||
include(radiosity.h)
|
||||
|
||||
static void add_radiosity_to_vertex(Edge *edge, long reverse, Element *elem, Vertex *p_c, long process_id);
|
||||
static void _display_shaded_triangle(ElemVertex *ev1, ElemVertex *ev2, ElemVertex *ev3, Edge *e12, Edge *e23, Edge *e31, long process_id);
|
||||
static void _disp_interactions(Element *elem, Interaction *inter, long mode, long process_id);
|
||||
static void _ps_disp_interactions(Element *elem, Interaction *inter, long mode, long process_id);
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* radiosity_averaging
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void radiosity_averaging(Element *elem, long mode, long process_id)
|
||||
{
|
||||
float inv_weight ;
|
||||
Vertex pc ;
|
||||
long reverse ;
|
||||
|
||||
if( ! LEAF_ELEMENT(elem) )
|
||||
{
|
||||
create_radavg_task( elem->center, mode, process_id ) ;
|
||||
create_radavg_task( elem->top, mode, process_id ) ;
|
||||
create_radavg_task( elem->right, mode, process_id ) ;
|
||||
create_radavg_task( elem->left, mode, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
|
||||
else if( mode == RAD_AVERAGING_MODE )
|
||||
{
|
||||
/* Compute center point */
|
||||
center_point( &elem->ev1->p, &elem->ev2->p, &elem->ev3->p, &pc ) ;
|
||||
|
||||
reverse = EDGE_REVERSE( elem->e12, elem->ev1, elem->ev2 ) ;
|
||||
foreach_leaf_edge( elem->e12, reverse, add_radiosity_to_vertex, (long)elem, (long)&pc, process_id ) ;
|
||||
reverse = EDGE_REVERSE( elem->e23, elem->ev2, elem->ev3 ) ;
|
||||
foreach_leaf_edge( elem->e23, reverse, add_radiosity_to_vertex, (long)elem, (long)&pc, process_id ) ;
|
||||
reverse = EDGE_REVERSE( elem->e31, elem->ev3, elem->ev1 ) ;
|
||||
foreach_leaf_edge( elem->e31, reverse, add_radiosity_to_vertex, (long)elem, (long)&pc, process_id ) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Normalize it */
|
||||
LOCK(elem->ev1->ev_lock->lock);
|
||||
if( elem->ev1->weight != 1.0 )
|
||||
{
|
||||
inv_weight = (float)1.0 / elem->ev1->weight ;
|
||||
elem->ev1->col.r *= inv_weight ;
|
||||
elem->ev1->col.g *= inv_weight ;
|
||||
elem->ev1->col.b *= inv_weight ;
|
||||
elem->ev1->weight = 1.0 ;
|
||||
}
|
||||
UNLOCK(elem->ev1->ev_lock->lock);
|
||||
|
||||
LOCK(elem->ev2->ev_lock->lock);
|
||||
if( elem->ev2->weight != 1.0 )
|
||||
{
|
||||
inv_weight = (float)1.0 / elem->ev2->weight ;
|
||||
elem->ev2->col.r *= inv_weight ;
|
||||
elem->ev2->col.g *= inv_weight ;
|
||||
elem->ev2->col.b *= inv_weight ;
|
||||
elem->ev2->weight = 1.0 ;
|
||||
}
|
||||
UNLOCK(elem->ev2->ev_lock->lock);
|
||||
|
||||
LOCK(elem->ev3->ev_lock->lock);
|
||||
if( elem->ev3->weight != 1.0 )
|
||||
{
|
||||
inv_weight = (float)1.0 / elem->ev3->weight ;
|
||||
elem->ev3->col.r *= inv_weight ;
|
||||
elem->ev3->col.g *= inv_weight ;
|
||||
elem->ev3->col.b *= inv_weight ;
|
||||
elem->ev3->weight = 1.0 ;
|
||||
}
|
||||
UNLOCK(elem->ev3->ev_lock->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void add_radiosity_to_vertex(Edge *edge, long reverse, Element *elem, Vertex *p_c, long process_id)
|
||||
{
|
||||
ElemVertex *ev ;
|
||||
float weight ;
|
||||
|
||||
if( reverse )
|
||||
ev = edge->pb ;
|
||||
else
|
||||
ev = edge->pa ;
|
||||
|
||||
weight = (float)1.0 / distance( &ev->p, p_c ) ;
|
||||
weight = 1.0 ;
|
||||
weight = elem->area ;
|
||||
LOCK(ev->ev_lock->lock);
|
||||
ev->col.r += (elem->rad.r * weight) ;
|
||||
ev->col.g += (elem->rad.g * weight) ;
|
||||
ev->col.b += (elem->rad.b * weight) ;
|
||||
ev->weight += weight ;
|
||||
UNLOCK(ev->ev_lock->lock);
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* setup_view()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
Vertex view_vec ; /* Origin to the viewer */
|
||||
static float view_rot_x, view_rot_y, view_dist, view_zoom ;
|
||||
|
||||
|
||||
void setup_view(float rot_x, float rot_y, float dist, float zoom, long process_id)
|
||||
{
|
||||
Vertex v1, v2 ;
|
||||
float cc, ss ;
|
||||
|
||||
/* Save parameters */
|
||||
view_rot_x = rot_x ;
|
||||
view_rot_y = rot_y ;
|
||||
view_dist = dist ;
|
||||
view_zoom = zoom ;
|
||||
|
||||
/* Compute view vector */
|
||||
v1.x = 0.0 ;
|
||||
v1.y = 0.0 ;
|
||||
v1.z = 1.0 ;
|
||||
|
||||
/* Rotate view vector */
|
||||
cc = cos( -rot_x * (M_PI / 180.0) ) ;
|
||||
ss = sin( -rot_x * (M_PI / 180.0) ) ;
|
||||
v2.x = v1.x ;
|
||||
v2.y = cc * v1.y - ss * v1.z ;
|
||||
v2.z = ss * v1.y + cc * v1.z ;
|
||||
|
||||
cc = cos( -rot_y * (M_PI / 180.0) ) ;
|
||||
ss = sin( -rot_y * (M_PI / 180.0) ) ;
|
||||
v1.z = cc * v2.z - ss * v2.x ;
|
||||
v1.x = ss * v2.z + cc * v2.x ;
|
||||
v1.y = v2.y ;
|
||||
|
||||
/* Store view vector */
|
||||
view_vec = v1 ;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_scene()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_scene(long fill_sw, long patch_sw, long mesh_sw, long interaction_sw, long process_id)
|
||||
{
|
||||
/* Clear the screen */
|
||||
g_clear() ;
|
||||
|
||||
/* Set matrix */
|
||||
g_setup_view( view_rot_x, view_rot_y, view_dist, view_zoom ) ;
|
||||
|
||||
if( fill_sw == 2 )
|
||||
{
|
||||
/* Fill surfaces */
|
||||
display_elements_in_bsp_tree( DISPLAY_SHADED, process_id ) ;
|
||||
}
|
||||
if( fill_sw == 1 )
|
||||
{
|
||||
/* Fill surfaces */
|
||||
display_elements_in_bsp_tree( DISPLAY_FILLED, process_id ) ;
|
||||
}
|
||||
if( mesh_sw )
|
||||
{
|
||||
/* Draw mesh */
|
||||
g_color( G_BLUE ) ;
|
||||
display_elements_in_bsp_tree( DISPLAY_EDGEONLY, process_id ) ;
|
||||
}
|
||||
if( patch_sw )
|
||||
{
|
||||
g_color( G_RED ) ;
|
||||
display_patches_in_bsp_tree( DISPLAY_EDGEONLY, process_id ) ;
|
||||
}
|
||||
if( interaction_sw )
|
||||
{
|
||||
g_color( G_GREEN ) ;
|
||||
display_interactions_in_bsp_tree(process_id) ;
|
||||
}
|
||||
|
||||
/* Flush */
|
||||
g_flush() ;
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_patch()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_patch(Patch *patch, long mode, long process_id)
|
||||
{
|
||||
Vertex p_buf[4] ;
|
||||
Rgb c_buf[4] ;
|
||||
|
||||
if( mode == DISPLAY_SHADED )
|
||||
{
|
||||
if( inner_product( &patch->plane_equ.n, &view_vec ) < F_ZERO )
|
||||
return ;
|
||||
|
||||
p_buf[0] = patch->p1 ;
|
||||
p_buf[1] = patch->p2 ;
|
||||
p_buf[2] = patch->p3 ;
|
||||
c_buf[0] = patch->color ;
|
||||
c_buf[1] = patch->color ;
|
||||
c_buf[2] = patch->color ;
|
||||
|
||||
g_spolygon( 3, p_buf, c_buf ) ;
|
||||
}
|
||||
else if( mode == DISPLAY_FILLED )
|
||||
{
|
||||
if( inner_product( &patch->plane_equ.n, &view_vec ) < F_ZERO )
|
||||
return ;
|
||||
|
||||
p_buf[0] = patch->p1 ;
|
||||
p_buf[1] = patch->p2 ;
|
||||
p_buf[2] = patch->p3 ;
|
||||
|
||||
g_polygon( 3, p_buf ) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_line( &patch->p1, &patch->p2 ) ;
|
||||
g_line( &patch->p2, &patch->p3 ) ;
|
||||
g_line( &patch->p3, &patch->p1 ) ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_patches_in_bsp_tree()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_patches_in_bsp_tree(long mode, long process_id)
|
||||
{
|
||||
foreach_depth_sorted_patch( &view_vec, display_patch, (long)mode, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_element()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_element(Element *element, long mode, long process_id)
|
||||
{
|
||||
Vertex p_buf[4] ;
|
||||
|
||||
if( inner_product( &element->patch->plane_equ.n, &view_vec ) < F_ZERO )
|
||||
return ;
|
||||
|
||||
if( mode == DISPLAY_SHADED )
|
||||
{
|
||||
_display_shaded_triangle( element->ev1, element->ev2,
|
||||
element->ev3,
|
||||
element->e12, element->e23, element->e31, process_id ) ;
|
||||
}
|
||||
else if( mode == DISPLAY_FILLED )
|
||||
{
|
||||
g_rgb( element->rad ) ;
|
||||
p_buf[0] = element->ev1->p ;
|
||||
p_buf[1] = element->ev2->p ;
|
||||
p_buf[2] = element->ev3->p ;
|
||||
|
||||
g_polygon( 3, p_buf ) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_line( &element->ev1->p, &element->ev2->p ) ;
|
||||
g_line( &element->ev2->p, &element->ev3->p ) ;
|
||||
g_line( &element->ev3->p, &element->ev1->p ) ;
|
||||
}
|
||||
}
|
||||
|
||||
static void _display_shaded_triangle(ElemVertex *ev1, ElemVertex *ev2, ElemVertex *ev3, Edge *e12, Edge *e23, Edge *e31, long process_id)
|
||||
{
|
||||
Vertex p_buf[4] ;
|
||||
Rgb c_buf[4] ;
|
||||
|
||||
p_buf[0] = ev1->p ;
|
||||
p_buf[1] = ev2->p ;
|
||||
p_buf[2] = ev3->p ;
|
||||
c_buf[0] = ev1->col ;
|
||||
c_buf[1] = ev2->col ;
|
||||
c_buf[2] = ev3->col ;
|
||||
g_spolygon( 3, p_buf, c_buf ) ;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_elements_in_patch()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_elements_in_patch(Patch *patch, long mode, long process_id)
|
||||
{
|
||||
foreach_leaf_element_in_patch( patch, display_element, mode, process_id ) ;
|
||||
g_flush() ;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_elements_in_bsp_tree()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_elements_in_bsp_tree(long mode, long process_id)
|
||||
{
|
||||
foreach_depth_sorted_patch( &view_vec, display_elements_in_patch, mode, process_id );
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_interactions_in_element()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_interactions_in_element(Element *elem, long mode, long process_id)
|
||||
{
|
||||
|
||||
foreach_interaction_in_element( elem, _disp_interactions, mode, process_id ) ;
|
||||
g_flush() ;
|
||||
}
|
||||
|
||||
|
||||
static void _disp_interactions(Element *elem, Interaction *inter, long mode, long process_id)
|
||||
{
|
||||
Vertex pa, pb ;
|
||||
Element *edst ;
|
||||
|
||||
|
||||
/* Display interactions only with a particular patch */
|
||||
if( (mode == DISPLAY_HALF_INTERACTIONS)
|
||||
&& (inter->destination->patch->seq_no >= elem->patch->seq_no ) )
|
||||
return ;
|
||||
|
||||
/* Compute mid point of the element */
|
||||
edst = inter->destination ;
|
||||
center_point( &elem->ev1->p, &elem->ev2->p, &elem->ev3->p, &pa ) ;
|
||||
center_point( &edst->ev1->p, &edst->ev2->p, &edst->ev3->p, &pb ) ;
|
||||
|
||||
/* Draw a line */
|
||||
g_line( &pa, &pb ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_interactions_in_patch
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_interactions_in_patch(Patch *patch, long mode, long process_id)
|
||||
{
|
||||
foreach_element_in_patch( patch, display_interactions_in_element, mode, process_id );
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* display_interactions_in_bsp_tree
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void display_interactions_in_bsp_tree(long process_id)
|
||||
{
|
||||
foreach_patch_in_bsp( display_interactions_in_patch, DISPLAY_ALL_INTERACTIONS, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*************************************************************************
|
||||
*
|
||||
* PostScript Version driver
|
||||
*
|
||||
*************************************************************************
|
||||
*************************************************************************/
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_scene()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
|
||||
void ps_display_scene(long fill_sw, long patch_sw, long mesh_sw, long interaction_sw, long process_id)
|
||||
{
|
||||
if( fill_sw )
|
||||
{
|
||||
/* Fill surfaces */
|
||||
ps_display_elements_in_bsp_tree( DISPLAY_SHADED, process_id ) ;
|
||||
}
|
||||
if( mesh_sw )
|
||||
{
|
||||
/* Draw mesh */
|
||||
ps_linewidth( 0.5 ) ;
|
||||
ps_display_elements_in_bsp_tree( DISPLAY_EDGEONLY, process_id ) ;
|
||||
}
|
||||
if( patch_sw )
|
||||
{
|
||||
/* Draw patches */
|
||||
ps_linewidth( 1.2 ) ;
|
||||
ps_display_patches_in_bsp_tree( DISPLAY_EDGEONLY, process_id ) ;
|
||||
}
|
||||
if( interaction_sw )
|
||||
{
|
||||
/* Draw interactions */
|
||||
ps_linewidth( 0.2 ) ;
|
||||
ps_display_interactions_in_bsp_tree(process_id) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_patch()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void ps_display_patch(Patch *patch, long mode, long process_id)
|
||||
{
|
||||
Vertex p_buf[4] ;
|
||||
Rgb c_buf[4] ;
|
||||
|
||||
if( mode == DISPLAY_SHADED )
|
||||
{
|
||||
if( inner_product( &patch->plane_equ.n, &view_vec ) < F_ZERO )
|
||||
return ;
|
||||
p_buf[0] = patch->p1 ;
|
||||
p_buf[1] = patch->p2 ;
|
||||
p_buf[2] = patch->p3 ;
|
||||
c_buf[0] = patch->color ;
|
||||
c_buf[1] = patch->color ;
|
||||
c_buf[2] = patch->color ;
|
||||
|
||||
ps_spolygon( 3, p_buf, c_buf ) ;
|
||||
}
|
||||
else if( mode == DISPLAY_FILLED )
|
||||
{
|
||||
if( inner_product( &patch->plane_equ.n, &view_vec ) < F_ZERO )
|
||||
return ;
|
||||
p_buf[0] = patch->p1 ;
|
||||
p_buf[1] = patch->p2 ;
|
||||
p_buf[2] = patch->p3 ;
|
||||
|
||||
ps_polygon( 3, p_buf ) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
p_buf[0] = patch->p1 ;
|
||||
p_buf[1] = patch->p2 ;
|
||||
p_buf[2] = patch->p3 ;
|
||||
|
||||
ps_polygonedge( 3, p_buf ) ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_patches_in_bsp_tree()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void ps_display_patches_in_bsp_tree(long mode, long process_id)
|
||||
{
|
||||
foreach_depth_sorted_patch( &view_vec, ps_display_patch, (long)mode, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_element()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void ps_display_element(Element *element, long mode, long process_id)
|
||||
{
|
||||
Vertex p_buf[4] ;
|
||||
Rgb c_buf[4] ;
|
||||
|
||||
if( mode == DISPLAY_SHADED )
|
||||
{
|
||||
if( inner_product( &element->patch->plane_equ.n, &view_vec )
|
||||
< F_ZERO )
|
||||
return ;
|
||||
p_buf[0] = element->ev1->p ;
|
||||
p_buf[1] = element->ev2->p ;
|
||||
p_buf[2] = element->ev3->p ;
|
||||
c_buf[0] = element->rad ;
|
||||
c_buf[1] = element->rad ;
|
||||
c_buf[2] = element->rad ;
|
||||
|
||||
ps_spolygon( 3, p_buf, c_buf ) ;
|
||||
}
|
||||
else if( mode == DISPLAY_FILLED )
|
||||
{
|
||||
if( inner_product( &element->patch->plane_equ.n, &view_vec )
|
||||
< F_ZERO )
|
||||
return ;
|
||||
p_buf[0] = element->ev1->p ;
|
||||
p_buf[1] = element->ev2->p ;
|
||||
p_buf[2] = element->ev3->p ;
|
||||
|
||||
ps_polygon( 3, p_buf ) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
p_buf[0] = element->ev1->p ;
|
||||
p_buf[1] = element->ev2->p ;
|
||||
p_buf[2] = element->ev3->p ;
|
||||
|
||||
ps_polygonedge( 3, p_buf ) ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_elements_in_patch()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void ps_display_elements_in_patch(Patch *patch, long mode, long process_id)
|
||||
{
|
||||
foreach_leaf_element_in_patch( patch, ps_display_element, mode, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_elements_in_bsp_tree()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void ps_display_elements_in_bsp_tree(long mode, long process_id)
|
||||
{
|
||||
foreach_depth_sorted_patch( &view_vec, ps_display_elements_in_patch, mode, process_id ) ;
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_interactions_in_element()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void ps_display_interactions_in_element(Element *elem, long mode, long process_id)
|
||||
{
|
||||
foreach_interaction_in_element( elem, _ps_disp_interactions, mode, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
static void _ps_disp_interactions(Element *elem, Interaction *inter, long mode, long process_id)
|
||||
{
|
||||
Vertex pa, pb ;
|
||||
Element *edst ;
|
||||
|
||||
/* Display interactions only with a particular patch */
|
||||
if( (mode == DISPLAY_HALF_INTERACTIONS)
|
||||
&& (inter->destination->patch->seq_no >= elem->patch->seq_no ) )
|
||||
return ;
|
||||
|
||||
/* Compute mid point of the element */
|
||||
edst = inter->destination ;
|
||||
center_point( &elem->ev1->p, &elem->ev2->p, &elem->ev3->p, &pa ) ;
|
||||
center_point( &edst->ev1->p, &edst->ev2->p, &edst->ev3->p, &pb ) ;
|
||||
|
||||
/* Draw a line */
|
||||
ps_line( &pa, &pb ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_interactions_in_patch
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void ps_display_interactions_in_patch(Patch *patch, long mode, long process_id)
|
||||
{
|
||||
foreach_element_in_patch( patch, ps_display_interactions_in_element, mode, process_id );
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_display_interactions_in_bsp_tree
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
void ps_display_interactions_in_bsp_tree(long process_id)
|
||||
{
|
||||
foreach_patch_in_bsp( ps_display_interactions_in_patch, DISPLAY_ALL_INTERACTIONS, process_id ) ;
|
||||
}
|
||||
|
1432
splash2/codes/apps/radiosity/elemman.C
Normal file
1432
splash2/codes/apps/radiosity/elemman.C
Normal file
File diff suppressed because it is too large
Load diff
15
splash2/codes/apps/radiosity/glibdumb/Makefile
Normal file
15
splash2/codes/apps/radiosity/glibdumb/Makefile
Normal file
|
@ -0,0 +1,15 @@
|
|||
#
|
||||
# Device independent graphics package GLIB.
|
||||
# NULL graphic device version
|
||||
#
|
||||
|
||||
OBJS = glib.o
|
||||
TARGET = glib.a
|
||||
|
||||
include ../../../Makefile.config
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
ar crv $(TARGET) $(OBJS)
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJS) $(TARGET)
|
144
splash2/codes/apps/radiosity/glibdumb/glib.c
Normal file
144
splash2/codes/apps/radiosity/glibdumb/glib.c
Normal file
|
@ -0,0 +1,144 @@
|
|||
/* -*-mode:c-*- */
|
||||
/************************************************************************
|
||||
*
|
||||
* CS348C Radiosity
|
||||
*
|
||||
* (Dirty) extension to YOSSI interface.
|
||||
*
|
||||
*
|
||||
* May 6, 1991
|
||||
* Tsai, Tso-Sheng
|
||||
* Totsuka, Takashi
|
||||
*
|
||||
* Derived from xsupport.c by Yossi Friedman
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#include "glib.h"
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* Globals
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_init()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_init(int ac, char *av[])
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_start()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_start(void (*mouse_func)(), long n_sliders, slider *slider_def, long n_choices, choice *choice_def)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_color()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_color(long color)
|
||||
{
|
||||
}
|
||||
|
||||
void g_rgb( color )
|
||||
|
||||
Rgb color ;
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_line()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_line(Vertex *p1, Vertex *p2)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_polygon()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_polygon(long n, Vertex *p_list)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_spolygon()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_spolygon(long n, Vertex *p_list, Rgb *c_list)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_clear()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_clear()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_setup_view()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_setup_view(float rot_x, float rot_y, float dist, float zoom)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_get_screen_size()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_get_screen_size(long *u, long *v)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* g_flush()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void g_flush()
|
||||
{
|
||||
}
|
||||
|
79
splash2/codes/apps/radiosity/glibdumb/glib.h
Normal file
79
splash2/codes/apps/radiosity/glibdumb/glib.h
Normal file
|
@ -0,0 +1,79 @@
|
|||
/* -*-mode:c-*- */
|
||||
/**************************************************************
|
||||
*
|
||||
* CS348C Radiosity
|
||||
*
|
||||
* Device independent graphics package.
|
||||
*
|
||||
* May 6, 1991
|
||||
* Tsai, Tso-Sheng
|
||||
* Totsuka, Takashi
|
||||
*
|
||||
***************************************************************/
|
||||
|
||||
#ifndef _GLIB_H
|
||||
#define _GLIB_H
|
||||
|
||||
#include "../structs.H"
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Color names
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
#define G_BLACK (256)
|
||||
#define G_RED (257)
|
||||
#define G_GREEN (258)
|
||||
#define G_YELLOW (259)
|
||||
#define G_BLUE (260)
|
||||
#define G_MAGENTA (261)
|
||||
#define G_CYAN (262)
|
||||
#define G_WHITE (263)
|
||||
|
||||
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Panel data structures
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
long min, max;
|
||||
long init_value;
|
||||
long ticks;
|
||||
void (*callback)();
|
||||
} slider;
|
||||
|
||||
|
||||
#define MAX_POSSIBILITIES 32
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
char *possibilities[MAX_POSSIBILITIES];
|
||||
long init_value;
|
||||
void (*callback)();
|
||||
} choice;
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Library function type definition
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
void g_init(int ac, char *av[]);
|
||||
void g_start(void (*mouse_func)(void), long n_sliders, slider *slider_def, long n_choices, choice *choice_def);
|
||||
void g_color(long color);
|
||||
void g_rgb(Rgb color);
|
||||
void g_line(Vertex *p1, Vertex *p2);
|
||||
void g_polygon(long n, Vertex *p_list);
|
||||
void g_spolygon(long n, Vertex *p_list, Rgb *c_list);
|
||||
void g_clear(void);
|
||||
void g_setup_view(float rot_x, float rot_y, float dist, float zoom);
|
||||
void g_get_screen_size(long *u, long *v);
|
||||
void g_flush(void);
|
||||
|
||||
#endif
|
16
splash2/codes/apps/radiosity/glibps/Makefile
Normal file
16
splash2/codes/apps/radiosity/glibps/Makefile
Normal file
|
@ -0,0 +1,16 @@
|
|||
#
|
||||
# CS348C Radiosity Device independent graphics package GLIB.
|
||||
# SUN/Xview version makefile
|
||||
#
|
||||
#
|
||||
|
||||
TARGET = glibps.a
|
||||
OBJS = glibps.o
|
||||
|
||||
include ../../../Makefile.config
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
ar crv $(TARGET) $(OBJS)
|
||||
|
||||
clean:
|
||||
rm -rf *.o $(TARGET)
|
688
splash2/codes/apps/radiosity/glibps/glibps.c
Normal file
688
splash2/codes/apps/radiosity/glibps/glibps.c
Normal file
|
@ -0,0 +1,688 @@
|
|||
/* -*-mode:c-*- */
|
||||
/***************************************************************
|
||||
*
|
||||
* Radiosity
|
||||
*
|
||||
* Graphic driver for PostScript
|
||||
*
|
||||
*
|
||||
***************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "pslib.h"
|
||||
|
||||
#define SCREEN_WIDTH (6.0*72)
|
||||
#define SCREEN_HEIGHT (4.8*72)
|
||||
#define SCREEN_DEPTH (65536)
|
||||
#define ASPECT_RATIO ((float)SCREEN_WIDTH/(float)SCREEN_HEIGHT)
|
||||
|
||||
#define PRE_CAT (1)
|
||||
#define POST_CAT (0)
|
||||
|
||||
#define DEFAULT_WINDOW_HEIGHT (2000.0)
|
||||
#define DEFAULT_WINDOW_WIDTH (DEFAULT_WINDOW_HEIGHT*ASPECT_RATIO)
|
||||
#define DEFAULT_FRONT_PLANE_Z (2000.0)
|
||||
#define DEFAULT_BACK_PLANE_Z (-4000.0)
|
||||
#define DEFAULT_PRP_Z (10000.0) /* Projection point Z coord. */
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* Globals
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
static Matrix trans_mtx ; /* WC -> DC */
|
||||
static Vertex2 prp ; /* Projection point */
|
||||
static Vertex2 active_prp ; /* Projection point in effect (WC) */
|
||||
static float view_rotx, view_roty ; /* Viewing */
|
||||
static float view_zoom ;
|
||||
|
||||
static float clip_right, clip_left ; /* View volume (X) */
|
||||
static float clip_top, clip_bottom ; /* (Y) */
|
||||
static float clip_front, clip_back ; /* (Z) */
|
||||
|
||||
|
||||
static FILE *ps_fd ;
|
||||
|
||||
static void setup_transformation(void);
|
||||
static void init_transformation(void);
|
||||
static void gset_unit_matrix(Matrix *mtx);
|
||||
static void gconcatenate_matrix(long precat, Matrix *m1, Matrix *m2);
|
||||
static void gscale_matrix(long precat, Matrix *m1, float sx, float sy, float sz);
|
||||
static void gtranslate_matrix(long precat, Matrix *m1, float tx, float ty, float tz);
|
||||
static void grotate_x_matrix(long precat, Matrix *m1, float rot);
|
||||
static void grotate_y_matrix(long precat, Matrix *m1, float rot);
|
||||
static void gtransform(Vertex2 *v1, Vertex2 *v2, Matrix *mtx);
|
||||
static void ginverse_matrix(Matrix *m1, Matrix *m2);
|
||||
static double det(Matrix *m);
|
||||
static double cdet(Matrix *m, long r0, long r1, long r2, long c0, long c1, long c2);
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* ps_open()
|
||||
* ps_close()
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
|
||||
|
||||
long ps_open(char *file)
|
||||
{
|
||||
if( (ps_fd = fopen( file, "w" )) == 0 )
|
||||
{
|
||||
perror( file ) ;
|
||||
return( 0 ) ;
|
||||
}
|
||||
|
||||
/* Print out preamble */
|
||||
fprintf( ps_fd, "%%!PS-Adobe-1.0\n" ) ;
|
||||
fprintf( ps_fd, "%%%%EndComments\n" ) ;
|
||||
fprintf( ps_fd, "%%%%Pages: 1\n" ) ;
|
||||
fprintf( ps_fd, "%%%%EndProlog\n" ) ;
|
||||
fprintf( ps_fd, "%%%%Page: 1 1\n" ) ;
|
||||
fprintf( ps_fd, "\n" ) ;
|
||||
|
||||
/* Default line cap/join */
|
||||
fprintf( ps_fd, "1 setlinecap 1 setlinejoin\n" ) ;
|
||||
|
||||
/* Initialize transformation */
|
||||
init_transformation() ;
|
||||
setup_transformation() ;
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ps_close()
|
||||
{
|
||||
if( ps_fd == 0 )
|
||||
return ;
|
||||
|
||||
|
||||
fprintf( ps_fd, "showpage\n" ) ;
|
||||
fprintf( ps_fd, "%%%%Trailer\n" ) ;
|
||||
fclose( ps_fd ) ;
|
||||
|
||||
ps_fd = 0 ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* ps_linewidth()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void ps_linewidth(float w)
|
||||
{
|
||||
if( ps_fd == 0 )
|
||||
return ;
|
||||
|
||||
fprintf( ps_fd, "%f setlinewidth\n", w ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* ps_line()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void ps_line(Vertex *p1, Vertex *p2)
|
||||
{
|
||||
Vertex2 v1, v2 ;
|
||||
float x1, y1, x2, y2 ;
|
||||
|
||||
if( ps_fd == 0 )
|
||||
return ;
|
||||
|
||||
v1.v[0] = p1->x ; v1.v[1] = p1->y ; v1.v[2] = p1->z ; v1.v[3] = 1.0 ;
|
||||
v2.v[0] = p2->x ; v2.v[1] = p2->y ; v2.v[2] = p2->z ; v2.v[3] = 1.0 ;
|
||||
gtransform( &v1, &v1, &trans_mtx ) ;
|
||||
gtransform( &v2, &v2, &trans_mtx ) ;
|
||||
x1 = v1.v[0] / v1.v[3] ;
|
||||
y1 = v1.v[1] / v1.v[3] ;
|
||||
x2 = v2.v[0] / v2.v[3] ;
|
||||
y2 = v2.v[1] / v2.v[3] ;
|
||||
|
||||
|
||||
fprintf( ps_fd, "newpath\n%f %f moveto\n", x1, y1 ) ;
|
||||
fprintf( ps_fd, "%f %f lineto\nstroke\n", x2, y2 ) ;
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* ps_polygonedge()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void ps_polygonedge(long n, Vertex *p_list)
|
||||
{
|
||||
float dcx, dcy ;
|
||||
Vertex2 v ;
|
||||
long i ;
|
||||
|
||||
if( ps_fd == 0 )
|
||||
return ;
|
||||
|
||||
/* Transform */
|
||||
v.v[0] = p_list[0].x ;
|
||||
v.v[1] = p_list[0].y ;
|
||||
v.v[2] = p_list[0].z ;
|
||||
v.v[3] = 1.0 ;
|
||||
gtransform( &v, &v, &trans_mtx ) ;
|
||||
dcx = v.v[0] / v.v[3] ;
|
||||
dcy = v.v[1] / v.v[3] ;
|
||||
fprintf( ps_fd, "newpath\n%f %f moveto\n", dcx, dcy ) ;
|
||||
|
||||
for( i = 1 ; i < n ; i++ )
|
||||
{
|
||||
/* Transform */
|
||||
v.v[0] = p_list[i].x ;
|
||||
v.v[1] = p_list[i].y ;
|
||||
v.v[2] = p_list[i].z ;
|
||||
v.v[3] = 1.0 ;
|
||||
gtransform( &v, &v, &trans_mtx ) ;
|
||||
dcx = v.v[0] / v.v[3] ;
|
||||
dcy = v.v[1] / v.v[3] ;
|
||||
|
||||
fprintf( ps_fd, "%f %f lineto\n", dcx, dcy ) ;
|
||||
}
|
||||
|
||||
fprintf( ps_fd, "closepath stroke\n" ) ;
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* ps_polygon()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void ps_polygon(long n, Vertex *p_list)
|
||||
{
|
||||
float dcx, dcy ;
|
||||
Vertex2 v ;
|
||||
long i ;
|
||||
|
||||
if( ps_fd == 0 )
|
||||
return ;
|
||||
|
||||
/* Transform */
|
||||
v.v[0] = p_list[0].x ;
|
||||
v.v[1] = p_list[0].y ;
|
||||
v.v[2] = p_list[0].z ;
|
||||
v.v[3] = 1.0 ;
|
||||
gtransform( &v, &v, &trans_mtx ) ;
|
||||
dcx = v.v[0] / v.v[3] ;
|
||||
dcy = v.v[1] / v.v[3] ;
|
||||
fprintf( ps_fd, "newpath\n%f %f moveto\n", dcx, dcy ) ;
|
||||
|
||||
for( i = 1 ; i < n ; i++ )
|
||||
{
|
||||
/* Transform */
|
||||
v.v[0] = p_list[i].x ;
|
||||
v.v[1] = p_list[i].y ;
|
||||
v.v[2] = p_list[i].z ;
|
||||
v.v[3] = 1.0 ;
|
||||
gtransform( &v, &v, &trans_mtx ) ;
|
||||
dcx = v.v[0] / v.v[3] ;
|
||||
dcy = v.v[1] / v.v[3] ;
|
||||
|
||||
fprintf( ps_fd, "%f %f lineto\n", dcx, dcy ) ;
|
||||
}
|
||||
|
||||
fprintf( ps_fd, "closepath fill\n" ) ;
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* ps_spolygon()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void ps_spolygon(long n, Vertex *p_list, Rgb *c_list)
|
||||
{
|
||||
float dcx, dcy ;
|
||||
Vertex2 v ;
|
||||
long i ;
|
||||
float gray_scale ;
|
||||
|
||||
if( ps_fd == 0 )
|
||||
return ;
|
||||
|
||||
/* Transform */
|
||||
v.v[0] = p_list[0].x ;
|
||||
v.v[1] = p_list[0].y ;
|
||||
v.v[2] = p_list[0].z ;
|
||||
v.v[3] = 1.0 ;
|
||||
gtransform( &v, &v, &trans_mtx ) ;
|
||||
dcx = v.v[0] / v.v[3] ;
|
||||
dcy = v.v[1] / v.v[3] ;
|
||||
fprintf( ps_fd, "newpath\n%f %f moveto\n", dcx, dcy ) ;
|
||||
|
||||
for( i = 1 ; i < n ; i++ )
|
||||
{
|
||||
/* Transform */
|
||||
v.v[0] = p_list[i].x ;
|
||||
v.v[1] = p_list[i].y ;
|
||||
v.v[2] = p_list[i].z ;
|
||||
v.v[3] = 1.0 ;
|
||||
gtransform( &v, &v, &trans_mtx ) ;
|
||||
dcx = v.v[0] / v.v[3] ;
|
||||
dcy = v.v[1] / v.v[3] ;
|
||||
|
||||
fprintf( ps_fd, "%f %f lineto\n", dcx, dcy ) ;
|
||||
}
|
||||
|
||||
gray_scale = c_list[0].g ;
|
||||
if( gray_scale > 1.0 )
|
||||
gray_scale = 1.0 ;
|
||||
else if( gray_scale < 0.0 )
|
||||
gray_scale = 0.0 ;
|
||||
|
||||
fprintf( ps_fd, "closepath %f setgray fill\n", gray_scale ) ;
|
||||
}
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* ps_clear()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void ps_clear()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* ps_setup_view()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
void ps_setup_view(float rot_x, float rot_y, float dist, float zoom)
|
||||
{
|
||||
prp.v[0] = 0.0 ;
|
||||
prp.v[1] = 0.0 ;
|
||||
prp.v[2] = (float)dist ;
|
||||
prp.v[3] = 0.0 ;
|
||||
view_rotx = rot_x ;
|
||||
view_roty = rot_y ;
|
||||
view_zoom = zoom ;
|
||||
|
||||
setup_transformation() ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* setup_transformation()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
static void setup_transformation()
|
||||
{
|
||||
float cf_z, cb_z ;
|
||||
Matrix pmat ;
|
||||
|
||||
/* Set to unit matrix */
|
||||
gset_unit_matrix( &trans_mtx ) ;
|
||||
|
||||
/* View orientation matrix */
|
||||
grotate_x_matrix( POST_CAT, &trans_mtx, view_rotx ) ;
|
||||
grotate_y_matrix( POST_CAT, &trans_mtx, view_roty ) ;
|
||||
|
||||
/* Compute active (currently effective) projection point */
|
||||
ginverse_matrix( &pmat, &trans_mtx ) ;
|
||||
gtransform( &active_prp, &prp, &pmat ) ;
|
||||
|
||||
/* Perspective projection */
|
||||
gset_unit_matrix( &pmat ) ;
|
||||
pmat.m[2][3] = - 1 / prp.v[2] ;
|
||||
gconcatenate_matrix( POST_CAT, &trans_mtx, &pmat ) ;
|
||||
|
||||
cf_z = prp.v[2] * clip_front / ( prp.v[2] - clip_front ) ;
|
||||
cb_z = prp.v[2] * clip_back / ( prp.v[2] - clip_back ) ;
|
||||
|
||||
/* Window-Viewport */
|
||||
gscale_matrix( POST_CAT, &trans_mtx,
|
||||
(float)SCREEN_WIDTH / (clip_right - clip_left),
|
||||
(float)SCREEN_HEIGHT / (clip_top - clip_bottom),
|
||||
(float)SCREEN_DEPTH / (cf_z - cb_z) ) ;
|
||||
|
||||
gtranslate_matrix( POST_CAT, &trans_mtx,
|
||||
-(float)SCREEN_WIDTH * clip_left / (clip_right - clip_left),
|
||||
-(float)SCREEN_HEIGHT* clip_top / (clip_bottom - clip_top),
|
||||
-(float)SCREEN_DEPTH * cb_z / (cf_z - cb_z) ) ;
|
||||
|
||||
gtranslate_matrix( POST_CAT, &trans_mtx,
|
||||
(float)(1.0*72), (float)(0.5*72), 0 ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**************************************************
|
||||
*
|
||||
* init_transformation()
|
||||
*
|
||||
***************************************************/
|
||||
|
||||
static void init_transformation()
|
||||
{
|
||||
/* Initialize matrix, just in case */
|
||||
gset_unit_matrix( &trans_mtx ) ;
|
||||
|
||||
/* Initialize Projection point */
|
||||
prp.v[0] = 0.0 ;
|
||||
prp.v[1] = 0.0 ;
|
||||
prp.v[2] = DEFAULT_PRP_Z ;
|
||||
prp.v[3] = 0.0 ;
|
||||
|
||||
/* Viewing */
|
||||
view_rotx = view_roty = 0.0 ;
|
||||
view_zoom = 1.0 ;
|
||||
|
||||
/* Initialize view volume boundary */
|
||||
clip_right = DEFAULT_WINDOW_WIDTH / 2.0 ;
|
||||
clip_left = -DEFAULT_WINDOW_WIDTH / 2.0 ;
|
||||
clip_top = DEFAULT_WINDOW_HEIGHT / 2.0 ;
|
||||
clip_bottom= -DEFAULT_WINDOW_HEIGHT / 2.0 ;
|
||||
clip_front = DEFAULT_FRONT_PLANE_Z ;
|
||||
clip_back = DEFAULT_BACK_PLANE_Z ;
|
||||
}
|
||||
|
||||
|
||||
/********************************************
|
||||
*
|
||||
* set_unit_matrix()
|
||||
*
|
||||
*********************************************/
|
||||
|
||||
static void gset_unit_matrix(Matrix *mtx)
|
||||
{
|
||||
long row, col ;
|
||||
|
||||
/* Clear the matrix */
|
||||
for( row = 0 ; row < 4 ; row++ )
|
||||
for( col = 0 ; col < 4 ; col++ )
|
||||
mtx->m[row][col] = 0.0 ;
|
||||
|
||||
/* Set 1.0s along diagonal line */
|
||||
for( row = 0 ; row < 4 ; row++ )
|
||||
mtx->m[row][row] = 1.0 ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/********************************************
|
||||
*
|
||||
* concatenate_matrix()
|
||||
*
|
||||
* m1 <- m1 * m2 (precat = 1)
|
||||
* m1 <- m2 * m1 (precat = 0)
|
||||
*
|
||||
*********************************************/
|
||||
|
||||
static void gconcatenate_matrix(long precat, Matrix *m1, Matrix *m2)
|
||||
{
|
||||
long row, col, scan ;
|
||||
Matrix *dest ;
|
||||
Matrix temp ;
|
||||
|
||||
|
||||
/* Swap pointer according to the concatenation mode */
|
||||
dest = m1 ;
|
||||
if( precat == 1 )
|
||||
{
|
||||
m1 = m2 ;
|
||||
m2 = dest ;
|
||||
}
|
||||
|
||||
/* concatenate it */
|
||||
for( row = 0 ; row < 4 ; row++ )
|
||||
for( col = 0 ; col < 4 ; col++ )
|
||||
{
|
||||
temp.m[row][col] = 0.0 ;
|
||||
for( scan = 0 ; scan < 4 ; scan++ )
|
||||
temp.m[row][col] +=
|
||||
m1->m[row][scan] * m2->m[scan][col];
|
||||
}
|
||||
|
||||
*dest = temp ;
|
||||
}
|
||||
|
||||
|
||||
/********************************************
|
||||
*
|
||||
* scale_matrix()
|
||||
*
|
||||
* m1 <- SCALE * m1 (precat = 1)
|
||||
* m1 <- m1 * SCALE (precat = 0)
|
||||
*
|
||||
*********************************************/
|
||||
|
||||
static void gscale_matrix(long precat, Matrix *m1, float sx, float sy, float sz)
|
||||
{
|
||||
Matrix smat ;
|
||||
|
||||
/* Initialize to unit matrix */
|
||||
gset_unit_matrix( &smat ) ;
|
||||
|
||||
/* Set scale values */
|
||||
smat.m[0][0] = sx ;
|
||||
smat.m[1][1] = sy ;
|
||||
smat.m[2][2] = sz ;
|
||||
|
||||
/* concatenate */
|
||||
gconcatenate_matrix( precat, m1, &smat ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/********************************************
|
||||
*
|
||||
* translate_matrix()
|
||||
*
|
||||
* m1 <- T * m1 (precat = 1)
|
||||
* m1 <- m1 * T (precat = 0)
|
||||
*
|
||||
*********************************************/
|
||||
|
||||
static void gtranslate_matrix(long precat, Matrix *m1, float tx, float ty, float tz)
|
||||
{
|
||||
Matrix tmat ;
|
||||
|
||||
/* Initialize to unit matrix */
|
||||
gset_unit_matrix( &tmat ) ;
|
||||
|
||||
/* Set scale values */
|
||||
tmat.m[3][0] = tx ;
|
||||
tmat.m[3][1] = ty ;
|
||||
tmat.m[3][2] = tz ;
|
||||
|
||||
/* concatenate */
|
||||
gconcatenate_matrix( precat, m1, &tmat ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/********************************************
|
||||
*
|
||||
* rotate_x_matrix()
|
||||
* rotate_y_matrix()
|
||||
* rotate_z_matrix()
|
||||
*
|
||||
* m1 <- ROT * m1 (precat = 1)
|
||||
* m1 <- m1 * ROT (precat = 0)
|
||||
*
|
||||
*********************************************/
|
||||
|
||||
static void grotate_x_matrix(long precat, Matrix *m1, float rot)
|
||||
{
|
||||
Matrix rmat ;
|
||||
float s_val, c_val ;
|
||||
|
||||
/* Initialize to unit matrix */
|
||||
gset_unit_matrix( &rmat ) ;
|
||||
|
||||
/* Set scale values */
|
||||
s_val = sin( rot * M_PI / 180.0 ) ;
|
||||
c_val = cos( rot * M_PI / 180.0 ) ;
|
||||
rmat.m[1][1] = c_val ;
|
||||
rmat.m[1][2] = s_val ;
|
||||
rmat.m[2][1] = -s_val ;
|
||||
rmat.m[2][2] = c_val ;
|
||||
|
||||
/* concatenate */
|
||||
gconcatenate_matrix( precat, m1, &rmat ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void grotate_y_matrix(long precat, Matrix *m1, float rot)
|
||||
{
|
||||
Matrix rmat ;
|
||||
float s_val, c_val ;
|
||||
|
||||
/* Initialize to unit matrix */
|
||||
gset_unit_matrix( &rmat ) ;
|
||||
|
||||
/* Set scale values */
|
||||
s_val = sin( rot * M_PI / 180.0 ) ;
|
||||
c_val = cos( rot * M_PI / 180.0 ) ;
|
||||
rmat.m[0][0] = c_val ;
|
||||
rmat.m[0][2] = -s_val ;
|
||||
rmat.m[2][0] = s_val ;
|
||||
rmat.m[2][2] = c_val ;
|
||||
|
||||
/* concatenate */
|
||||
gconcatenate_matrix( precat, m1, &rmat ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/********************************************
|
||||
*
|
||||
* transform()
|
||||
*
|
||||
* v1 <- v2 * mtx
|
||||
*
|
||||
*********************************************/
|
||||
|
||||
static void gtransform(Vertex2 *v1, Vertex2 *v2, Matrix *mtx)
|
||||
{
|
||||
float x, y, z, w ;
|
||||
|
||||
x = v2->v[0] * mtx->m[0][0] ;
|
||||
y = v2->v[0] * mtx->m[0][1] ;
|
||||
z = v2->v[0] * mtx->m[0][2] ;
|
||||
w = v2->v[0] * mtx->m[0][3] ;
|
||||
|
||||
x += v2->v[1] * mtx->m[1][0] ;
|
||||
y += v2->v[1] * mtx->m[1][1] ;
|
||||
z += v2->v[1] * mtx->m[1][2] ;
|
||||
w += v2->v[1] * mtx->m[1][3] ;
|
||||
|
||||
x += v2->v[2] * mtx->m[2][0] ;
|
||||
y += v2->v[2] * mtx->m[2][1] ;
|
||||
z += v2->v[2] * mtx->m[2][2] ;
|
||||
w += v2->v[2] * mtx->m[2][3] ;
|
||||
|
||||
x += v2->v[3] * mtx->m[3][0] ;
|
||||
y += v2->v[3] * mtx->m[3][1] ;
|
||||
z += v2->v[3] * mtx->m[3][2] ;
|
||||
w += v2->v[3] * mtx->m[3][3] ;
|
||||
|
||||
v1->v[0] = x ;
|
||||
v1->v[1] = y ;
|
||||
v1->v[2] = z ;
|
||||
v1->v[3] = w ;
|
||||
}
|
||||
|
||||
|
||||
/********************************************
|
||||
*
|
||||
* inverse_matrix()
|
||||
*
|
||||
* m1 <- inv(m2)
|
||||
*
|
||||
*********************************************/
|
||||
|
||||
|
||||
static void ginverse_matrix(Matrix *m1, Matrix *m2)
|
||||
{
|
||||
double detval ;
|
||||
|
||||
/* det(m2) */
|
||||
detval = det( m2 ) ;
|
||||
|
||||
/* Clamel's solution */
|
||||
m1->m[0][0] = cdet( m2, 1,2,3, 1,2,3 ) / detval ;
|
||||
m1->m[0][1] = -cdet( m2, 0,2,3, 1,2,3 ) / detval ;
|
||||
m1->m[0][2] = cdet( m2, 0,1,3, 1,2,3 ) / detval ;
|
||||
m1->m[0][3] = -cdet( m2, 0,1,2, 1,2,3 ) / detval ;
|
||||
|
||||
m1->m[1][0] = -cdet( m2, 1,2,3, 0,2,3 ) / detval ;
|
||||
m1->m[1][1] = cdet( m2, 0,2,3, 0,2,3 ) / detval ;
|
||||
m1->m[1][2] = -cdet( m2, 0,1,3, 0,2,3 ) / detval ;
|
||||
m1->m[1][3] = cdet( m2, 0,1,2, 0,2,3 ) / detval ;
|
||||
|
||||
m1->m[2][0] = cdet( m2, 1,2,3, 0,1,3 ) / detval ;
|
||||
m1->m[2][1] = -cdet( m2, 0,2,3, 0,1,3 ) / detval ;
|
||||
m1->m[2][2] = cdet( m2, 0,1,3, 0,1,3 ) / detval ;
|
||||
m1->m[2][3] = -cdet( m2, 0,1,2, 0,1,3 ) / detval ;
|
||||
|
||||
m1->m[3][0] = -cdet( m2, 1,2,3, 0,1,2 ) / detval ;
|
||||
m1->m[3][1] = cdet( m2, 0,2,3, 0,1,2 ) / detval ;
|
||||
m1->m[3][2] = -cdet( m2, 0,1,3, 0,1,2 ) / detval ;
|
||||
m1->m[3][3] = cdet( m2, 0,1,2, 0,1,2 ) / detval ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static double det(Matrix *m)
|
||||
{
|
||||
double det_sum ;
|
||||
|
||||
/* Expand with respect to column 4 */
|
||||
det_sum = 0.0 ;
|
||||
if( m->m[0][3] != 0.0 )
|
||||
det_sum -= m->m[0][3] * cdet( m, 1, 2, 3, 0, 1, 2 ) ;
|
||||
if( m->m[1][3] != 0.0 )
|
||||
det_sum += m->m[1][3] * cdet( m, 0, 2, 3, 0, 1, 2 ) ;
|
||||
if( m->m[2][3] != 0.0 )
|
||||
det_sum -= m->m[2][3] * cdet( m, 0, 1, 3, 0, 1, 2 ) ;
|
||||
if( m->m[3][3] != 0.0 )
|
||||
det_sum += m->m[3][3] * cdet( m, 0, 1, 2, 0, 1, 2 ) ;
|
||||
|
||||
return( det_sum ) ;
|
||||
}
|
||||
|
||||
|
||||
static double cdet(Matrix *m, long r0, long r1, long r2, long c0, long c1, long c2)
|
||||
{
|
||||
double temp ;
|
||||
|
||||
temp = m->m[r0][c0] * m->m[r1][c1] * m->m[r2][c2] ;
|
||||
temp += m->m[r1][c0] * m->m[r2][c1] * m->m[r0][c2] ;
|
||||
temp += m->m[r2][c0] * m->m[r0][c1] * m->m[r1][c2] ;
|
||||
|
||||
temp -= m->m[r2][c0] * m->m[r1][c1] * m->m[r0][c2] ;
|
||||
temp -= m->m[r1][c0] * m->m[r0][c1] * m->m[r2][c2] ;
|
||||
temp -= m->m[r0][c0] * m->m[r2][c1] * m->m[r1][c2] ;
|
||||
|
||||
return( temp ) ;
|
||||
}
|
47
splash2/codes/apps/radiosity/glibps/pslib.h
Normal file
47
splash2/codes/apps/radiosity/glibps/pslib.h
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* -*-mode:c-*- */
|
||||
/**************************************************************
|
||||
*
|
||||
* CS348C Radiosity
|
||||
*
|
||||
* Device independent graphics package.
|
||||
*
|
||||
* May 6, 1991
|
||||
* Tsai, Tso-Sheng
|
||||
* Totsuka, Takashi
|
||||
*
|
||||
***************************************************************/
|
||||
|
||||
#ifndef _PSLIB_H
|
||||
#define _PSLIB_H
|
||||
|
||||
#include "../structs.H"
|
||||
|
||||
#define M_PI 3.14159265358979323846
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float v[4] ; /* x, y, z, and w */
|
||||
} Vertex2;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
float m[4][4] ; /* m[row][column], row vector assumed */
|
||||
} Matrix;
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Library function type definition
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
long ps_open(char *file);
|
||||
void ps_close(void);
|
||||
void ps_linewidth(float w);
|
||||
void ps_line(Vertex *p1, Vertex *p2);
|
||||
void ps_polygonedge(long n, Vertex *p_list);
|
||||
void ps_polygon(long n, Vertex *p_list);
|
||||
void ps_spolygon(long n, Vertex *p_list, Rgb *c_list);
|
||||
void ps_clear(void);
|
||||
void ps_setup_view(float rot_x, float rot_y, float dist, float zoom);
|
||||
|
||||
#endif
|
BIN
splash2/codes/apps/radiosity/libpthread.a
Normal file
BIN
splash2/codes/apps/radiosity/libpthread.a
Normal file
Binary file not shown.
BIN
splash2/codes/apps/radiosity/m5op_x86.o
Normal file
BIN
splash2/codes/apps/radiosity/m5op_x86.o
Normal file
Binary file not shown.
76
splash2/codes/apps/radiosity/model.H
Normal file
76
splash2/codes/apps/radiosity/model.H
Normal file
|
@ -0,0 +1,76 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/* Header file for model data structures and definitions */
|
||||
|
||||
#ifndef _MODEL_H
|
||||
#define _MODEL_H
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Constants
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#define MODEL_TRIANGLE (0)
|
||||
#define MODEL_RECTANGLE (1)
|
||||
#define MODEL_NULL (-1)
|
||||
|
||||
#define MODEL_TEST_DATA (0)
|
||||
#define MODEL_ROOM_DATA (1)
|
||||
#define MODEL_LARGEROOM_DATA (2)
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Model descriptor
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
/* General structure of the model descriptor */
|
||||
typedef struct {
|
||||
Rgb color ; /* Diffuse color */
|
||||
Rgb emittance ; /* Radiant emittance */
|
||||
Vertex _dummy[4] ;
|
||||
} Model ;
|
||||
|
||||
/* Triangle */
|
||||
typedef struct {
|
||||
Rgb color ; /* Diffuse color */
|
||||
Rgb emittance ; /* Radiant emittance */
|
||||
Vertex p1, p2, p3 ;
|
||||
} Model_Triangle ;
|
||||
|
||||
typedef Model_Triangle Model_Rectangle ;
|
||||
|
||||
|
||||
typedef struct {
|
||||
long type ;
|
||||
Model model ;
|
||||
} ModelDataBase ;
|
||||
|
||||
/*
|
||||
* modelman.C
|
||||
*/
|
||||
void init_modeling_tasks(long process_id);
|
||||
void process_model(Model *model, long type, long process_id);
|
||||
|
||||
extern long model_selector ;
|
||||
|
||||
#endif
|
||||
|
||||
|
296
splash2/codes/apps/radiosity/modelman.C
Normal file
296
splash2/codes/apps/radiosity/modelman.C
Normal file
|
@ -0,0 +1,296 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Model definition & management.
|
||||
*
|
||||
* This module has the following functions:
|
||||
* (1) Create modeling tasks.
|
||||
* (2) Process modeling tasks.
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
EXTERN_ENV;
|
||||
|
||||
include(radiosity.h)
|
||||
|
||||
static void init_test_model_tasks(long process_id);
|
||||
static void init_room_model_tasks(ModelDataBase *model, long process_id);
|
||||
static void process_triangle(Model_Triangle *model, long process_id);
|
||||
static void process_rectangle(Model_Rectangle *model, long process_id);
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* Test Model definitions.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#define WHITE { 0.9, 0.9, 0.9 }
|
||||
#define GREEN { 0.1, 0.9, 0.1 }
|
||||
#define PASSIVE { 0.0, 0.0, 0.0 }
|
||||
#define WHITE_LIGHT { 200.0, 200.0, 200.0 }
|
||||
|
||||
Model_Triangle tri1 = { WHITE,
|
||||
PASSIVE,
|
||||
{ 0.0, 0.0, 0.0 },
|
||||
{ 100.0, 0.0, 0.0 },
|
||||
{ 0.0, 100.0, 0.0 } } ;
|
||||
|
||||
Model_Triangle tri2 = { WHITE,
|
||||
PASSIVE,
|
||||
{ 0.0, 0.0, 100.0 },
|
||||
{ 100.0, 100.0, 0.0 },
|
||||
{ 0.0, 0.0, -100.0 } } ;
|
||||
|
||||
Model_Rectangle Floor = { WHITE,
|
||||
PASSIVE,
|
||||
{ 600.0, -600.0, 700.0 },
|
||||
{ 600.0, -600.0, -700.0 },
|
||||
{ -800.0, -600.0, 700.0 } } ;
|
||||
|
||||
Model_Rectangle Ceiling = { WHITE,
|
||||
PASSIVE,
|
||||
{ 600.0, 600.0, 700.0 },
|
||||
{ -800.0, 600.0, 700.0 },
|
||||
{ 600.0, 600.0, -700.0 } } ;
|
||||
|
||||
Model_Rectangle wall1 = { WHITE,
|
||||
PASSIVE,
|
||||
{ -800.0, -600.0, 700.0 },
|
||||
{ -800.0, -600.0, -700.0 },
|
||||
{ -800.0, 600.0, 700.0 } } ;
|
||||
|
||||
Model_Rectangle wall2 = { WHITE,
|
||||
PASSIVE,
|
||||
{ 600.0, -600.0, -700.0 },
|
||||
{ 600.0, 600.0, -700.0 },
|
||||
{ -800.0, -600.0, -700.0 } } ;
|
||||
|
||||
Model_Rectangle deskTop = { GREEN,
|
||||
PASSIVE,
|
||||
{ -795.0, -320.0, 300.0 },
|
||||
{ -400.0, -320.0, 300.0 },
|
||||
{ -795.0, -320.0, -300.0 } } ;
|
||||
|
||||
Model_Rectangle deskBtm = { GREEN,
|
||||
PASSIVE,
|
||||
{ -795.0, -340.0, 300.0 },
|
||||
{ -795.0, -340.0, -300.0 },
|
||||
{ -400.0, -340.0, 300.0 } } ;
|
||||
|
||||
Model_Rectangle light1 = { WHITE,
|
||||
WHITE_LIGHT,
|
||||
{ -795.0, -50.0, 50.0 },
|
||||
{ -795.0, -50.0, -50.0 },
|
||||
{ -795.0, 50.0, 50.0 } } ;
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods of Model object
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************/
|
||||
/***************************************************************************
|
||||
*
|
||||
* init_modeling_tasks()
|
||||
*
|
||||
* Initialize modeling task.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
long model_selector = MODEL_TEST_DATA ;
|
||||
|
||||
void init_modeling_tasks(long process_id)
|
||||
{
|
||||
|
||||
extern ModelDataBase room_model[] ;
|
||||
extern ModelDataBase largeroom_model[] ;
|
||||
|
||||
if( ! check_task_counter() )
|
||||
return ;
|
||||
|
||||
switch( model_selector )
|
||||
{
|
||||
case MODEL_TEST_DATA:
|
||||
default:
|
||||
init_test_model_tasks(process_id) ;
|
||||
break ;
|
||||
case MODEL_ROOM_DATA:
|
||||
init_room_model_tasks( room_model, process_id ) ;
|
||||
break ;
|
||||
case MODEL_LARGEROOM_DATA:
|
||||
init_room_model_tasks( largeroom_model, process_id ) ;
|
||||
break ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void init_test_model_tasks(long process_id)
|
||||
{
|
||||
create_modeling_task( (Model*)&Floor, MODEL_RECTANGLE, process_id ) ;
|
||||
create_modeling_task( (Model*)&Ceiling, MODEL_RECTANGLE, process_id ) ;
|
||||
create_modeling_task( (Model*)&wall1, MODEL_RECTANGLE, process_id ) ;
|
||||
create_modeling_task( (Model*)&wall2, MODEL_RECTANGLE, process_id ) ;
|
||||
create_modeling_task( (Model*)&deskTop, MODEL_RECTANGLE, process_id ) ;
|
||||
create_modeling_task( (Model*)&deskBtm, MODEL_RECTANGLE, process_id ) ;
|
||||
create_modeling_task( (Model*)&light1, MODEL_RECTANGLE, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void init_room_model_tasks(ModelDataBase *model, long process_id)
|
||||
{
|
||||
ModelDataBase *pm ;
|
||||
|
||||
for( pm = model ; pm->type != MODEL_NULL ; pm++ )
|
||||
create_modeling_task( &pm->model, pm->type, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* process_model()
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void process_model(Model *model, long type, long process_id)
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case MODEL_TRIANGLE:
|
||||
process_triangle( (Model_Triangle *)model, process_id ) ;
|
||||
break ;
|
||||
case MODEL_RECTANGLE:
|
||||
process_rectangle( (Model_Rectangle *)model, process_id ) ;
|
||||
break ;
|
||||
default:
|
||||
fprintf( stderr, "Panic:process_model:Illegal type %ld\n", type ) ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* process_triangle()
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
static void process_triangle(Model_Triangle *model, long process_id)
|
||||
{
|
||||
Patch *p ;
|
||||
float length ;
|
||||
|
||||
/* Create a patch */
|
||||
p = get_patch(process_id) ;
|
||||
|
||||
/* (1) Set the Vertecies */
|
||||
p->p1 = model->p1 ;
|
||||
p->p2 = model->p2 ;
|
||||
p->p3 = model->p3 ;
|
||||
|
||||
/* (2) Create the Edges */
|
||||
p->ev1 = create_elemvertex( &p->p1, process_id ) ;
|
||||
p->ev2 = create_elemvertex( &p->p2, process_id ) ;
|
||||
p->ev3 = create_elemvertex( &p->p3, process_id ) ;
|
||||
p->e12 = create_edge( p->ev1, p->ev2, process_id ) ;
|
||||
p->e23 = create_edge( p->ev2, p->ev3, process_id ) ;
|
||||
p->e31 = create_edge( p->ev3, p->ev1, process_id ) ;
|
||||
|
||||
/* (3) Other patch properties */
|
||||
length = comp_plane_equ( &p->plane_equ,
|
||||
&model->p1, &model->p2, &model->p3, process_id ) ;
|
||||
p->area = length * (float)0.5 ;
|
||||
p->color = model->color ;
|
||||
p->emittance = model->emittance ;
|
||||
|
||||
|
||||
/* Create a BSP insertion task */
|
||||
create_bsp_task( p, global->bsp_root, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* process_rectangle()
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
static void process_rectangle(Model_Rectangle *model, long process_id)
|
||||
{
|
||||
Patch *p, *q ;
|
||||
float length ;
|
||||
|
||||
/* Create a patch (P1-P2-P3) */
|
||||
p = get_patch(process_id) ;
|
||||
|
||||
/* (1) Set the Vertecies */
|
||||
p->p1 = model->p1 ;
|
||||
p->p2 = model->p2 ;
|
||||
p->p3 = model->p3 ;
|
||||
|
||||
/* (2) Create the Edges */
|
||||
p->ev1 = create_elemvertex( &p->p1, process_id ) ;
|
||||
p->ev2 = create_elemvertex( &p->p2, process_id ) ;
|
||||
p->ev3 = create_elemvertex( &p->p3, process_id ) ;
|
||||
p->e12 = create_edge( p->ev1, p->ev2, process_id ) ;
|
||||
p->e23 = create_edge( p->ev2, p->ev3, process_id ) ;
|
||||
p->e31 = create_edge( p->ev3, p->ev1, process_id ) ;
|
||||
|
||||
/* (3) Other patch properties */
|
||||
length = comp_plane_equ( &p->plane_equ,
|
||||
&model->p1, &model->p2, &model->p3, process_id ) ;
|
||||
p->area = length * (float)0.5 ;
|
||||
p->color = model->color ;
|
||||
p->emittance= model->emittance ;
|
||||
|
||||
|
||||
/* Create a patch (P(2+3-1)-P3-P2) */
|
||||
q = get_patch(process_id) ;
|
||||
|
||||
/* (1) Set the Vertices */
|
||||
q->p1.x = model->p2.x + model->p3.x - model->p1.x ;
|
||||
q->p1.y = model->p2.y + model->p3.y - model->p1.y ;
|
||||
q->p1.z = model->p2.z + model->p3.z - model->p1.z ;
|
||||
q->p2 = model->p3 ;
|
||||
q->p3 = model->p2 ;
|
||||
|
||||
/* (2) Create the Edges */
|
||||
q->ev1 = create_elemvertex( &q->p1, process_id ) ;
|
||||
q->ev2 = p->ev3 ;
|
||||
q->ev3 = p->ev2 ;
|
||||
q->e12 = create_edge( q->ev1, q->ev2, process_id ) ;
|
||||
q->e23 = p->e23 ;
|
||||
q->e31 = create_edge( q->ev3, q->ev1, process_id ) ;
|
||||
|
||||
/* (3) Other patch properties */
|
||||
q->plane_equ= p->plane_equ ;
|
||||
q->area = p->area ;
|
||||
q->color = p->color ;
|
||||
q->emittance= p->emittance ;
|
||||
|
||||
/* Create BSP insertion tasks */
|
||||
create_bsp_task( p, global->bsp_root, process_id ) ;
|
||||
create_bsp_task( q, global->bsp_root, process_id ) ;
|
||||
}
|
||||
|
||||
|
67
splash2/codes/apps/radiosity/parallel.H
Normal file
67
splash2/codes/apps/radiosity/parallel.H
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/**************************************************************
|
||||
*
|
||||
* Definitions relevant to parallel processing
|
||||
*
|
||||
***************************************************************/
|
||||
|
||||
#ifndef _PARALLEL_H
|
||||
#define _PARALLEL_H
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* Shared lock variable
|
||||
*
|
||||
* Some machines provide only a limited number of lock variables. This
|
||||
* data structure allows sharing of these lock variables.
|
||||
* The shared locks are divided into 2 segments so that different types of
|
||||
* objects are given different locks.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
LOCKDEC(lock)
|
||||
} Shared_Lock ;
|
||||
|
||||
#define SHARED_LOCK_SEG_SIZE (MAX_SHARED_LOCK / 2)
|
||||
|
||||
#define SHARED_LOCK_SEG0 (0)
|
||||
#define SHARED_LOCK_SEG1 (1)
|
||||
#define SHARED_LOCK_SEGANY (2)
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Memory Consistency Model of the machine
|
||||
*
|
||||
* Some macro changes its behavior based on the memory consistency model
|
||||
*
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
/* Set one(1) to the model used in the machine. Set only one of these
|
||||
at a time */
|
||||
|
||||
#define MEM_CONSISTENCY_RELEASE (0)
|
||||
#define MEM_CONSISTENCY_WEAK (0)
|
||||
#define MEM_CONSISTENCY_PROCESSOR (1)
|
||||
|
||||
#endif
|
||||
|
316
splash2/codes/apps/radiosity/patch.H
Normal file
316
splash2/codes/apps/radiosity/patch.H
Normal file
|
@ -0,0 +1,316 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
|
||||
#ifndef _PATCH_H
|
||||
#define _PATCH_H
|
||||
|
||||
#include "structs.H"
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Constants
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#define F_COPLANAR (5.0e-2) /* H(P) < F_COPLANAR then P is on the plane */
|
||||
#define N_VISIBILITY_TEST_RAYS (10) /* number of "random", "magic" rays fired
|
||||
between patches to test visibility */
|
||||
|
||||
#define FF_GEOMETRY_ERROR (1.0) /* FF relative error due to Fdf approx
|
||||
and cosine approx of angle */
|
||||
#define FF_GEOMETRY_VARIANCE (1.0) /* FF relative varance with in elem */
|
||||
#define FF_VISIBILITY_ERROR (1.0 / N_VISIBILITY_TEST_RAYS)
|
||||
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Intersection code
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#define POINT_POSITIVE_SIDE (1)
|
||||
#define POINT_NEGATIVE_SIDE (2)
|
||||
#define POINT_ON_PLANE (0)
|
||||
|
||||
#define P1_POSITIVE (1)
|
||||
#define P1_NEGATIVE (2)
|
||||
#define P2_POSITIVE (4)
|
||||
#define P2_NEGATIVE (8)
|
||||
#define P3_POSITIVE (16)
|
||||
#define P3_NEGATIVE (32)
|
||||
#define ANY_POSITIVE (P1_POSITIVE | P2_POSITIVE | P3_POSITIVE)
|
||||
#define ANY_NEGATIVE (P1_NEGATIVE | P2_NEGATIVE | P3_NEGATIVE)
|
||||
#define POSITIVE_SIDE(code) (((code) & ANY_NEGATIVE) == 0)
|
||||
#define NEGATIVE_SIDE(code) (((code) & ANY_POSITIVE) == 0)
|
||||
#define INTERSECTING(code) ( ((code) & ANY_NEGATIVE) \
|
||||
&& ((code) & ANY_POSITIVE) )
|
||||
#define P1_CODE(code) (code & 3)
|
||||
#define P2_CODE(code) ((code >> 2) & 3)
|
||||
#define P3_CODE(code) ((code >> 4) & 3)
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Visibility Testing
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#define VISIBILITY_UNDEF ((float)-1.0)
|
||||
#define PATCH_CACHE_SIZE (2) /* The first two cache entries
|
||||
covers about 95% of the total cache hits, so using
|
||||
more doesn't help too much. */
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Refinement Advice
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#define _NO_INTERACTION (1)
|
||||
#define _NO_REFINEMENT_NECESSARY (2)
|
||||
#define _REFINE_PATCH_1 (4)
|
||||
#define _REFINE_PATCH_2 (8)
|
||||
#define _NO_VISIBILITY_NECESSARY (16)
|
||||
|
||||
#define NO_INTERACTION(c) ((c) & _NO_INTERACTION)
|
||||
#define NO_REFINEMENT_NECESSARY(c) ((c) & _NO_REFINEMENT_NECESSARY)
|
||||
#define REFINE_PATCH_1(c) ((c) & _REFINE_PATCH_1)
|
||||
#define REFINE_PATCH_2(c) ((c) & _REFINE_PATCH_2)
|
||||
#define NO_VISIBILITY_NECESSARY(c) ((c) & _NO_VISIBILITY_NECESSARY)
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Element Vertex
|
||||
*
|
||||
* ElementVertex represents a vertex of an element. A vertex structure
|
||||
* is shared by those elements which contain the vertex as part of their
|
||||
* vertex list.
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
typedef struct _elemvertex {
|
||||
Vertex p ; /* Coordinate of the vertex */
|
||||
Rgb col ; /* Color of the vertex */
|
||||
float weight ; /* weight */
|
||||
Shared_Lock *ev_lock ;
|
||||
} ElemVertex ;
|
||||
|
||||
|
||||
#define N_ELEMVERTEX_ALLOCATE (16)
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Edge
|
||||
*
|
||||
* Edge represents each edge of the element. Two adjacent elements
|
||||
* share the same edge. As an element is subdivided, the edge is also
|
||||
* subdivided. The edges form a binary tree, which can be viewed as a
|
||||
* projection of the element subdivision along an edge of the element.
|
||||
* In other words, the edge structure binds elements at the same height.
|
||||
* Note that the vertices may appear in reverse order in the edge structure
|
||||
* with respect to the order in the patch/element definition.
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
typedef struct _edge {
|
||||
ElemVertex *pa, *pb ;
|
||||
struct _edge *ea, *eb ; /* Edge (A-center) and (center-B) */
|
||||
Shared_Lock *edge_lock ; /* Use segment0 */
|
||||
} Edge ;
|
||||
|
||||
|
||||
#define N_EDGE_ALLOCATE (16)
|
||||
|
||||
#define _LEAF_EDGE(e) ((e)->ea == 0)
|
||||
#define EDGE_REVERSE(e,a,b) ((e)->pa == (b))
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Planar equation
|
||||
*
|
||||
* Plane equation (in implicit form) of the triangle patch.
|
||||
* A point P on the plane satisfies
|
||||
* (N.P) + C = 0
|
||||
* where N is the normal vector of the patch, C is a constant which
|
||||
* is the distance of the plane from the origin scaled by -|N|.
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
Vertex n ; /* Normal vector (normalized) */
|
||||
float c ; /* Constant */
|
||||
/* Nx*x + Ny*y + Nz*z + C = 0 */
|
||||
} PlaneEqu ;
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Patch (also a node of the BSP tree)
|
||||
*
|
||||
* The Patch represents a triangular patch (input polygon) of the given
|
||||
* geometric model (i.e., room scene). The Patch contains 'per-patch'
|
||||
* information such as the plane equation, area, and color. The Patch also
|
||||
* serves as a node of the BSP tree which is used to test patch-patch
|
||||
* visibility. The Patch points to the root level of the element quad-tree.
|
||||
* Geometrically speaking, the Patch and the root represent the same
|
||||
* triangle.
|
||||
* Although coordinates of the vertices are given by the Edge structure,
|
||||
* copies are stored in the Patch to allow fast access to the coordinates
|
||||
* during the visibility test.
|
||||
* For cost based task distribution another structure, Patch_Cost, is
|
||||
* also used. This structure is made separate from the Patch structure
|
||||
* since gathering cost statistics is a frequently read/write operation.
|
||||
* If it were in the Patch structure, updating a cost would result in
|
||||
* invalidation of the Patch structure and cause cache misses during
|
||||
* BSP traversal.
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
struct _element ;
|
||||
|
||||
typedef struct _patch {
|
||||
ElemVertex *ev1, *ev2, *ev3 ; /* ElemVertecies of the patch */
|
||||
Edge *e12, *e23, *e31 ; /* Edges of the patch */
|
||||
Vertex p1, p2, p3 ; /* Vertices of the patch */
|
||||
PlaneEqu plane_equ ; /* Plane equation H(x,y,z) */
|
||||
float area ; /* Area of the patch */
|
||||
Rgb color ; /* Diffuse color of the patch */
|
||||
/* (reflectance) */
|
||||
Rgb emittance ; /* Radiant emmitence */
|
||||
|
||||
struct _patch *bsp_positive ; /* BSP tree H(x,y,z) >= 0 */
|
||||
struct _patch *bsp_negative ; /* H(x,y,z) < 0 */
|
||||
struct _patch *bsp_parent ; /* BSP backpointer to the parent*/
|
||||
|
||||
struct _element *el_root ; /* Root of the element tree */
|
||||
long seq_no ; /* Patch sequence number */
|
||||
} Patch ;
|
||||
|
||||
|
||||
typedef struct {
|
||||
Patch *patch ;
|
||||
Shared_Lock *cost_lock ; /* Cost variable lock */
|
||||
long n_bsp_node ; /* Number of BSP nodes visited */
|
||||
long n_total_inter ; /* Total number of interactions */
|
||||
long cost_estimate ; /* Cost estimate */
|
||||
long cost_history[11] ; /* Cost history */
|
||||
} Patch_Cost ;
|
||||
|
||||
/* Patch cost:
|
||||
Visiting a node in BSP tree: 150 cyc (overall)
|
||||
Gathering ray per interaction: 50 cyc (overall avg) */
|
||||
|
||||
#define PATCH_COST(p) ((p)->n_bsp_node * 3 + (p)->n_total_inter)
|
||||
#define PATCH_COST_ESTIMATE(p) ((p)->cost_history[0] \
|
||||
+ ((p)->cost_history[1] >> 1)\
|
||||
+ ((p)->cost_history[2] >> 2) )
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Element
|
||||
*
|
||||
* The Element represents each node of the quad-tree generated by the
|
||||
* hierarchical subdivision. The Element structure consists of:
|
||||
* - pointers to maintain the tree structure
|
||||
* - a linear list of interacting elements
|
||||
* - radiosity value of the element
|
||||
* - pointer to the vertex and edge data structures
|
||||
*
|
||||
* To allow smooth radiosity interpolation across elements, an element
|
||||
* shares edges and vertices with adjacent elements.
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
struct _interact ;
|
||||
|
||||
typedef struct _element {
|
||||
Shared_Lock *elem_lock ; /* Element lock variable (seg 1) */
|
||||
Patch *patch ; /* Original patch of the element */
|
||||
|
||||
struct _element *parent ; /* Quad tree (parent) */
|
||||
struct _element *center ; /* (center triangle) */
|
||||
struct _element *top ; /* (top) */
|
||||
struct _element *left ; /* (left) */
|
||||
struct _element *right ; /* (right) */
|
||||
|
||||
struct _interact *interactions ; /* Top of light interaction list */
|
||||
long n_interactions ; /* Total # of interactions */
|
||||
struct _interact *vis_undef_inter ; /* Top of visibility undef list */
|
||||
long n_vis_undef_inter ; /* # of interactions whose visibility
|
||||
is not yet calculated */
|
||||
Rgb rad ; /* Radiosity of this element
|
||||
(new guess of B) */
|
||||
Rgb rad_in ; /* Sum of anscestor's radiosity */
|
||||
Rgb rad_subtree ; /* Area weighted sum of subtree's
|
||||
radiosity (includes this elem) */
|
||||
long join_counter ; /* # of unfinished subprocesses */
|
||||
|
||||
ElemVertex *ev1, *ev2, *ev3 ; /* Vertices of the element */
|
||||
Edge *e12, *e23, *e31 ; /* Edges of the element */
|
||||
float area ; /* Area of the element */
|
||||
} Element ;
|
||||
|
||||
|
||||
#define _LEAF_ELEMENT(e) ((e)->center == 0)
|
||||
|
||||
#if MEM_CONSISTENCY_PROCESSOR
|
||||
#define LEAF_ELEMENT(e) _LEAF_ELEMENT((e))
|
||||
#endif
|
||||
|
||||
#if (MEM_CONSISTENCY_RELEASE || MEM_CONSISTENCY_WEAK)
|
||||
extern long leaf_element() ;
|
||||
#define LEAF_ELEMENT(e) (leaf_element((e)))
|
||||
#endif
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Interaction
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
typedef struct _interact {
|
||||
struct _interact *next ; /* Next entry of the list */
|
||||
Element *destination ; /* Partner of the interaction */
|
||||
float formfactor_out ; /* Form factor from this patch */
|
||||
float formfactor_err ; /* Error of FF */
|
||||
float area_ratio ; /* Area(this) / Area(dest) */
|
||||
float visibility ; /* Visibility (0 - 1.0) */
|
||||
} Interaction ;
|
||||
|
||||
|
||||
void foreach_patch_in_bsp(void (*func)(), long arg1, long process_id);
|
||||
void foreach_depth_sorted_patch(Vertex *sort_vec, void (*func)(), long arg1, long process_id);
|
||||
void define_patch(Patch *patch, Patch *root, long process_id);
|
||||
void split_patch(Patch *patch, Patch *node, long xing_code, long process_id);
|
||||
void attach_element(Patch *patch, long process_id);
|
||||
void refine_newpatch(Patch *patch, long newpatch, long process_id);
|
||||
Patch *get_patch(long process_id);
|
||||
void init_patchlist(long process_id);
|
||||
void print_patch(Patch *patch, long process_id);
|
||||
void print_bsp_tree(long process_id);
|
||||
void _pr_patch(Patch *patch, long dummy, long process_id);
|
||||
float plane_equ(PlaneEqu *plane, Vertex *point, long process_id);
|
||||
float comp_plane_equ(PlaneEqu *pln, Vertex *p1, Vertex *p2, Vertex *p3, long process_id);
|
||||
long point_intersection(PlaneEqu *plane, Vertex *point, long process_id);
|
||||
long patch_intersection(PlaneEqu *plane, Vertex *p1, Vertex *p2, Vertex *p3, long process_id);
|
||||
void print_plane_equ(PlaneEqu *peq, long process_id);
|
||||
|
||||
#endif
|
749
splash2/codes/apps/radiosity/patchman.C
Normal file
749
splash2/codes/apps/radiosity/patchman.C
Normal file
|
@ -0,0 +1,749 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Patch management.
|
||||
*
|
||||
* This module has the following functions:
|
||||
* (1) Create/initialize a new instance of the patch object.
|
||||
* (2) Management of BSP tree (insertion,traversal)
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
EXTERN_ENV;
|
||||
|
||||
include(radiosity.h)
|
||||
|
||||
static void _foreach_patch(Patch *node, void (*func)(), long arg1, long process_id);
|
||||
static void _foreach_d_s_patch(Vertex *svec, Patch *node, void (*func)(), long arg1, long process_id);
|
||||
static void split_into_3(Patch *patch, ElemVertex *ev1, ElemVertex *ev2, ElemVertex *ev3, Edge *e12, Edge *e23, Edge *e31, Patch *parent, long process_id);
|
||||
static void split_into_2(Patch *patch, ElemVertex *ev1, ElemVertex *ev2, ElemVertex *ev3, Edge *e12, Edge *e23, Edge *e31, Patch *parent, long process_id);
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods of Patch object
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* foreach_patch_in_bsp()
|
||||
*
|
||||
* General purpose driver. For each patch in the BSP tree, apply specified
|
||||
* function. The function is passed a pointer to the patch.
|
||||
* Traversal is in-order, that is, subtree(-) -> node -> subtree(+).
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void foreach_patch_in_bsp(void (*func)(), long arg1, long process_id)
|
||||
{
|
||||
_foreach_patch( global->bsp_root, func, arg1, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
static void _foreach_patch(Patch *node, void (*func)(), long arg1, long process_id)
|
||||
{
|
||||
if( node == 0 )
|
||||
return ;
|
||||
|
||||
/* Process subtree(-) */
|
||||
if( node->bsp_negative )
|
||||
_foreach_patch( node->bsp_negative, func, arg1, process_id ) ;
|
||||
|
||||
/* Apply function to this node */
|
||||
func( node, arg1, process_id ) ;
|
||||
|
||||
/* Process subtree(+) */
|
||||
if( node->bsp_positive )
|
||||
_foreach_patch( node->bsp_positive, func, arg1, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* foreach_depth_sorted_patch()
|
||||
*
|
||||
* For each patch in the BSP tree, apply specified function. In the depth
|
||||
* sorted order along the given vector (from tail to arrow head of the
|
||||
* vector).
|
||||
* The function is passed a pointer to the patch.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void foreach_depth_sorted_patch(Vertex *sort_vec, void (*func)(), long arg1, long process_id)
|
||||
{
|
||||
_foreach_d_s_patch( sort_vec, global->bsp_root, func, arg1, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
static void _foreach_d_s_patch(Vertex *svec, Patch *node, void (*func)(), long arg1, long process_id)
|
||||
{
|
||||
float sign ;
|
||||
|
||||
if( node == 0 )
|
||||
return ;
|
||||
|
||||
/* Compute inner product */
|
||||
sign = inner_product( svec, &node->plane_equ.n ) ;
|
||||
|
||||
if( sign >= 0.0 )
|
||||
{
|
||||
/* The vector is approaching from the negative side of the patch */
|
||||
|
||||
/* Process subtree(-) */
|
||||
if( node->bsp_negative )
|
||||
_foreach_d_s_patch( svec, node->bsp_negative, func, arg1, process_id ) ;
|
||||
|
||||
/* Apply function to this node */
|
||||
func( node, arg1, process_id ) ;
|
||||
|
||||
/* Process subtree(+) */
|
||||
if( node->bsp_positive )
|
||||
_foreach_d_s_patch( svec, node->bsp_positive, func, arg1, process_id ) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Process subtree(+) */
|
||||
if( node->bsp_positive )
|
||||
_foreach_d_s_patch( svec, node->bsp_positive, func, arg1, process_id ) ;
|
||||
|
||||
/* Apply function to this node */
|
||||
func( node, arg1, process_id ) ;
|
||||
|
||||
/* Process subtree(-) */
|
||||
if( node->bsp_negative )
|
||||
_foreach_d_s_patch( svec, node->bsp_negative, func, arg1, process_id ) ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* define_patch()
|
||||
*
|
||||
* Insert a new patch in the BSP tree and put refinement task in the queue.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void define_patch(Patch *patch, Patch *root, long process_id)
|
||||
{
|
||||
Patch *parent = root ;
|
||||
long xing_code ;
|
||||
|
||||
/* Lock the BSP tree */
|
||||
LOCK(global->bsp_tree_lock);
|
||||
|
||||
/* If this is the first patch, link directly */
|
||||
if( parent == 0 )
|
||||
{
|
||||
if( global->bsp_root == 0 )
|
||||
{
|
||||
/* This is really the first patch */
|
||||
global->bsp_root = patch ;
|
||||
patch->bsp_positive = 0 ;
|
||||
patch->bsp_negative = 0 ;
|
||||
patch->bsp_parent = 0 ;
|
||||
attach_element( patch, process_id ) ;
|
||||
UNLOCK(global->bsp_tree_lock);
|
||||
|
||||
return ;
|
||||
}
|
||||
else
|
||||
/* Race condition. The root was NULL when the task was
|
||||
created */
|
||||
parent = global->bsp_root ;
|
||||
}
|
||||
|
||||
/* Traverse the BSP tree and get to the leaf node */
|
||||
while( 1 )
|
||||
{
|
||||
/* Check the sign */
|
||||
xing_code = patch_intersection( &parent->plane_equ, &patch->p1,
|
||||
&patch->p2, &patch->p3, process_id ) ;
|
||||
|
||||
/* Traverse down the tree according to the sign */
|
||||
if( POSITIVE_SIDE( xing_code ) )
|
||||
{
|
||||
if( parent->bsp_positive == 0 )
|
||||
{
|
||||
/* Insert the patch */
|
||||
parent->bsp_positive = patch ;
|
||||
patch->bsp_parent = parent ;
|
||||
attach_element( patch, process_id ) ;
|
||||
UNLOCK(global->bsp_tree_lock);
|
||||
|
||||
foreach_patch_in_bsp( refine_newpatch, (long)patch, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
else
|
||||
/* Traverse down to the subtree(+) */
|
||||
parent = parent->bsp_positive ;
|
||||
}
|
||||
else if( NEGATIVE_SIDE( xing_code ) )
|
||||
{
|
||||
if( parent->bsp_negative == 0 )
|
||||
{
|
||||
/* Insert the patch */
|
||||
parent->bsp_negative = patch ;
|
||||
patch->bsp_parent = parent ;
|
||||
attach_element( patch, process_id ) ;
|
||||
UNLOCK(global->bsp_tree_lock);
|
||||
|
||||
foreach_patch_in_bsp( refine_newpatch, (long)patch, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
else
|
||||
/* Traverse down to the subtree(-) */
|
||||
parent = parent->bsp_negative ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The patch must be split. Insertion is taken care of by
|
||||
split_patch(). */
|
||||
UNLOCK(global->bsp_tree_lock);
|
||||
split_patch( patch, parent, xing_code, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* split_patch()
|
||||
* split_into_3()
|
||||
* split_into_2()
|
||||
*
|
||||
* Split a patch and insert in the BSP tree.
|
||||
* split_into_3() Split a patch into 3 patches. The routine assumes both
|
||||
* P1-P2 and P1-P3 intersect the plane.
|
||||
* split_into_2() Split a patch into 2 patches. The routine assuems P1 is
|
||||
* on the plane and P2-P3 intersects the plane.
|
||||
* split_patch() Classify intersection type, rename vertices, and
|
||||
* call split_into_X().
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void split_patch(Patch *patch, Patch *node, long xing_code, long process_id)
|
||||
{
|
||||
long c1, c2, c3 ;
|
||||
|
||||
|
||||
c1 = P1_CODE( xing_code ) ;
|
||||
c2 = P2_CODE( xing_code ) ;
|
||||
c3 = P3_CODE( xing_code ) ;
|
||||
|
||||
/* Classify intersection type */
|
||||
if( c1 == c2 )
|
||||
/* P3 is on the oposite side */
|
||||
split_into_3( patch, patch->ev3, patch->ev1, patch->ev2,
|
||||
patch->e31, patch->e12, patch->e23, node, process_id) ;
|
||||
else if( c1 == c3 )
|
||||
/* P2 is on the oposite side */
|
||||
split_into_3( patch, patch->ev2, patch->ev3, patch->ev1,
|
||||
patch->e23, patch->e31, patch->e12, node, process_id ) ;
|
||||
else if( c2 == c3 )
|
||||
/* P1 is on the oposite side */
|
||||
split_into_3( patch, patch->ev1, patch->ev2, patch->ev3,
|
||||
patch->e12, patch->e23, patch->e31, node, process_id ) ;
|
||||
else if( c1 == POINT_ON_PLANE )
|
||||
/* P1 is on the plane. P2 and P3 are on the oposite side */
|
||||
split_into_2( patch, patch->ev1, patch->ev2, patch->ev3,
|
||||
patch->e12, patch->e23, patch->e31, node, process_id ) ;
|
||||
else if( c2 == POINT_ON_PLANE )
|
||||
/* P2 is on the plane. P3 and P1 are on the oposite side */
|
||||
split_into_2( patch, patch->ev2, patch->ev3, patch->ev1,
|
||||
patch->e23, patch->e31, patch->e12, node, process_id ) ;
|
||||
else
|
||||
/* P3 is on the plane. P1 and P2 are on the oposite side */
|
||||
split_into_2( patch, patch->ev3, patch->ev1, patch->ev2,
|
||||
patch->e31, patch->e12, patch->e23, node, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void split_into_3(Patch *patch, ElemVertex *ev1, ElemVertex *ev2, ElemVertex *ev3, Edge *e12, Edge *e23, Edge *e31, Patch *parent, long process_id)
|
||||
{
|
||||
ElemVertex *ev_a ; /* Intersection of P1-P2 & the patch */
|
||||
ElemVertex *ev_b ; /* Intersection of P1-P3 & the patch */
|
||||
float h1, h2, h3 ;
|
||||
float u2, u3 ;
|
||||
Patch *new ;
|
||||
Edge *e_ab, *e_3a ;
|
||||
long rev_e12, rev_e31 ;
|
||||
|
||||
|
||||
/* Compute intersection in terms of parametarized distance from P1 */
|
||||
h1 = plane_equ( &parent->plane_equ, &ev1->p, process_id ) ;
|
||||
h2 = plane_equ( &parent->plane_equ, &ev2->p, process_id ) ;
|
||||
h3 = plane_equ( &parent->plane_equ, &ev3->p, process_id ) ;
|
||||
|
||||
/* NOTE: Length of P1-P2 and P1-P3 are at least 2*F_COPLANAR.
|
||||
So, no check is necessary before division */
|
||||
u2 = h1 / (h1 - h2) ;
|
||||
if( (rev_e12 = EDGE_REVERSE( e12, ev1, ev2 )) )
|
||||
subdivide_edge( e12, u2, process_id ) ;
|
||||
else
|
||||
subdivide_edge( e12, (float)1.0 - u2, process_id ) ;
|
||||
ev_a = e12->ea->pb ;
|
||||
|
||||
u3 = h1 / (h1 - h3) ;
|
||||
if( (rev_e31 = EDGE_REVERSE( e31, ev3, ev1 )) )
|
||||
subdivide_edge( e31, (float)1.0 - u3, process_id ) ;
|
||||
else
|
||||
subdivide_edge( e31, u3, process_id ) ;
|
||||
ev_b = e31->ea->pb ;
|
||||
|
||||
/* Now insert patches in the tree */
|
||||
|
||||
/* (1) Put P1-Pa-Pb */
|
||||
new = get_patch(process_id) ;
|
||||
new->p1 = ev1->p ;
|
||||
new->p2 = ev_a->p ;
|
||||
new->p3 = ev_b->p ;
|
||||
|
||||
new->ev1 = ev1 ;
|
||||
new->ev2 = e12->ea->pb ;
|
||||
new->ev3 = e31->ea->pb ;
|
||||
|
||||
new->e12 = (!rev_e12)? e12->ea : e12->eb ;
|
||||
new->e23 = e_ab = create_edge(ev_a, ev_b, process_id ) ;
|
||||
new->e31 = (!rev_e31)? e31->eb : e31->ea ;
|
||||
|
||||
new->plane_equ = patch->plane_equ ;
|
||||
new->area = u2 * u3 * patch->area ;
|
||||
new->color = patch->color ;
|
||||
new->emittance = patch->emittance ;
|
||||
define_patch( new, parent, process_id ) ;
|
||||
|
||||
/* (2) Put Pa-P2-P3 */
|
||||
new = get_patch(process_id) ;
|
||||
new->p1 = ev_a->p ;
|
||||
new->p2 = ev2->p ;
|
||||
new->p3 = ev3->p ;
|
||||
|
||||
new->ev1 = ev_a ;
|
||||
new->ev2 = ev2 ;
|
||||
new->ev3 = ev3 ;
|
||||
|
||||
new->e12 = (!rev_e12)? e12->eb : e12->ea ;
|
||||
new->e23 = e23 ;
|
||||
new->e31 = e_3a = create_edge( ev3, ev_a, process_id ) ;
|
||||
|
||||
new->plane_equ = patch->plane_equ ;
|
||||
new->area = (1.0 - u2) * patch->area ;
|
||||
new->color = patch->color ;
|
||||
new->emittance = patch->emittance ;
|
||||
define_patch( new, parent, process_id ) ;
|
||||
|
||||
/* (3) Put Pa-P3-Pb. Reuse the original patch */
|
||||
patch->p1 = ev_a->p ;
|
||||
patch->p2 = ev3->p ;
|
||||
patch->p3 = ev_b->p ;
|
||||
|
||||
patch->ev1 = ev_a ;
|
||||
patch->ev2 = ev3 ;
|
||||
patch->ev3 = ev_b ;
|
||||
|
||||
patch->e12 = e_3a ;
|
||||
patch->e23 = (!rev_e31)? e31->ea : e31->eb ;
|
||||
patch->e31 = e_ab ;
|
||||
|
||||
patch->area = u2 * (1.0 - u3) * patch->area ;
|
||||
define_patch( patch, parent, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
static void split_into_2(Patch *patch, ElemVertex *ev1, ElemVertex *ev2, ElemVertex *ev3, Edge *e12, Edge *e23, Edge *e31, Patch *parent, long process_id)
|
||||
{
|
||||
ElemVertex *ev_a ;
|
||||
Edge *e_a1 ;
|
||||
float h2, h3 ;
|
||||
float u2 ;
|
||||
Patch *new ;
|
||||
long rev_e23 ;
|
||||
|
||||
/* Compute intersection in terms of parameterized distance from P2 */
|
||||
h2 = plane_equ( &parent->plane_equ, &ev2->p, process_id ) ;
|
||||
h3 = plane_equ( &parent->plane_equ, &ev3->p, process_id ) ;
|
||||
|
||||
/* NOTE: Length of P2-P3 is at least 2*F_COPLANAR.
|
||||
So, no check is necessary before division */
|
||||
u2 = h2 / (h2 - h3) ;
|
||||
if( (rev_e23 = EDGE_REVERSE( e23, ev2, ev3 )) )
|
||||
subdivide_edge( e23, u2, process_id ) ;
|
||||
else
|
||||
subdivide_edge( e23, (float)1.0 - u2, process_id ) ;
|
||||
ev_a = e23->ea->pb ;
|
||||
|
||||
|
||||
/* Now put patches in the tree */
|
||||
|
||||
/* (1) Put P1-P2-Pa */
|
||||
new = get_patch(process_id) ;
|
||||
|
||||
new->p1 = ev1->p ;
|
||||
new->p2 = ev2->p ;
|
||||
new->p3 = ev_a->p ;
|
||||
|
||||
new->ev1 = ev1 ;
|
||||
new->ev2 = ev2 ;
|
||||
new->ev3 = ev_a ;
|
||||
|
||||
new->e12 = e12 ;
|
||||
new->e23 = (!rev_e23)? e23->ea : e23->eb ;
|
||||
new->e31 = e_a1 = create_edge( ev_a, ev1, process_id ) ;
|
||||
|
||||
new->plane_equ = patch->plane_equ ;
|
||||
new->area = u2 * patch->area ;
|
||||
new->color = patch->color ;
|
||||
new->emittance = patch->emittance ;
|
||||
define_patch( new, parent, process_id ) ;
|
||||
|
||||
/* (2) Put P1-Pa-P3. Reuse the original patch */
|
||||
patch->p1 = ev1->p ;
|
||||
patch->p2 = ev_a->p ;
|
||||
patch->p3 = ev3->p ;
|
||||
|
||||
patch->ev1 = ev1 ;
|
||||
patch->ev2 = ev_a ;
|
||||
patch->ev3 = ev3 ;
|
||||
|
||||
patch->e12 = e_a1 ;
|
||||
patch->e23 = (!rev_e23)? e23->eb : e23->ea ;
|
||||
patch->e31 = e31 ;
|
||||
|
||||
patch->area = (1.0 - u2) * patch->area ;
|
||||
define_patch( patch, parent, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* attach_element()
|
||||
*
|
||||
* Attach an element to the patch. This element becomes the
|
||||
* root of the quad tree.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void attach_element(Patch *patch, long process_id)
|
||||
{
|
||||
Element *pelem ;
|
||||
|
||||
/* Create and link an element to the patch */
|
||||
pelem = get_element(process_id) ;
|
||||
patch->el_root = pelem ;
|
||||
|
||||
/* Initialization of the element */
|
||||
pelem->patch = patch ;
|
||||
pelem->ev1 = patch->ev1 ;
|
||||
pelem->ev2 = patch->ev2 ;
|
||||
pelem->ev3 = patch->ev3 ;
|
||||
|
||||
pelem->e12 = patch->e12 ;
|
||||
pelem->e23 = patch->e23 ;
|
||||
pelem->e31 = patch->e31 ;
|
||||
|
||||
pelem->area = patch->area ;
|
||||
pelem->rad = patch->emittance ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* refine_newpatch()
|
||||
*
|
||||
* Recursively subdivide
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
void refine_newpatch(Patch *patch, long newpatch, long process_id)
|
||||
{
|
||||
long cc ;
|
||||
Patch *new_patch = (Patch *)newpatch ;
|
||||
|
||||
/* Check sequence number */
|
||||
if( patch->seq_no >= new_patch->seq_no )
|
||||
/* Racing condition due to multiprocessing */
|
||||
return ;
|
||||
|
||||
/* Check visibility */
|
||||
cc = patch_intersection( &patch->plane_equ,
|
||||
&new_patch->p1, &new_patch->p2, &new_patch->p3, process_id ) ;
|
||||
if( NEGATIVE_SIDE(cc) )
|
||||
/* If negative or on the plane, then do nothing */
|
||||
return ;
|
||||
|
||||
cc = patch_intersection( &new_patch->plane_equ,
|
||||
&patch->p1, &patch->p2, &patch->p3, process_id ) ;
|
||||
if( NEGATIVE_SIDE(cc) )
|
||||
/* If negative or on the plane, then do nothing */
|
||||
return ;
|
||||
|
||||
/* Create a new task or do it by itself */
|
||||
create_ff_refine_task( patch->el_root, new_patch->el_root, 0, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* get_patch()
|
||||
*
|
||||
* Returns a new instance of the Patch object.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
Patch *get_patch(long process_id)
|
||||
{
|
||||
Patch *p ;
|
||||
|
||||
/* LOCK the free list */
|
||||
LOCK(global->free_patch_lock);
|
||||
|
||||
/* Test pointer */
|
||||
if( global->free_patch == 0 )
|
||||
{
|
||||
printf( "Fatal: Ran out of patch buffer\n" ) ;
|
||||
UNLOCK(global->free_patch_lock);
|
||||
exit( 1 ) ;
|
||||
}
|
||||
|
||||
/* Get a patch data structure */
|
||||
p = global->free_patch ;
|
||||
global->free_patch = p->bsp_positive ;
|
||||
global->n_total_patches++ ;
|
||||
global->n_free_patches-- ;
|
||||
|
||||
/* Unlock the list */
|
||||
UNLOCK(global->free_patch_lock);
|
||||
|
||||
/* Clear pointers just in case.. */
|
||||
p->el_root = 0 ;
|
||||
p->bsp_positive = 0 ;
|
||||
p->bsp_negative = 0 ;
|
||||
p->bsp_parent = 0 ;
|
||||
|
||||
return( p ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* init_patchlist()
|
||||
*
|
||||
* Initialize patch free list.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void init_patchlist(long process_id)
|
||||
{
|
||||
long i ;
|
||||
|
||||
/* Initialize Patch free list */
|
||||
for( i = 0 ; i < MAX_PATCHES-1 ; i++ )
|
||||
{
|
||||
global->patch_buf[i].bsp_positive = &global->patch_buf[i+1] ;
|
||||
global->patch_buf[i].seq_no = i ;
|
||||
}
|
||||
global->patch_buf[ MAX_PATCHES-1 ].bsp_positive = 0 ;
|
||||
global->patch_buf[ MAX_PATCHES-1 ].seq_no = MAX_PATCHES - 1 ;
|
||||
|
||||
global->free_patch = global->patch_buf ;
|
||||
global->n_total_patches = 0 ;
|
||||
global->n_free_patches = MAX_PATCHES ;
|
||||
LOCKINIT(global->free_patch_lock) ;
|
||||
|
||||
#if PATCH_ASSIGNMENT == PATCH_ASSIGNMENT_COSTBASED
|
||||
/* Initialize Patch_Cost structure */
|
||||
for( i = 0 ; i < MAX_PATCHES ; i++ )
|
||||
{
|
||||
global->patch_cost[i].patch = &global->patch_buf[i] ;
|
||||
global->patch_cost[i].cost_lock
|
||||
= get_sharedlock( SHARED_LOCK_SEGANY, process_id ) ;
|
||||
global->patch_cost[i].n_bsp_node = 0 ;
|
||||
global->patch_cost[i].n_total_inter = 0 ;
|
||||
global->patch_cost[i].cost_estimate = 0 ;
|
||||
global->patch_cost[i].cost_history[0] = 0 ;
|
||||
global->patch_cost[i].cost_history[1] = 0 ;
|
||||
global->patch_cost[i].cost_history[2] = 0 ;
|
||||
global->patch_cost[i].cost_history[3] = 0 ;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* print_patch()
|
||||
*
|
||||
* Print patch information.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void print_patch(Patch *patch, long process_id)
|
||||
{
|
||||
printf( "Patch (%ld)\n", (long)patch ) ;
|
||||
print_point( &patch->p1 ) ;
|
||||
print_point( &patch->p2 ) ;
|
||||
print_point( &patch->p3 ) ;
|
||||
print_plane_equ( &patch->plane_equ, process_id ) ;
|
||||
printf( "\tArea %f\n", patch->area ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* print_bsp_tree()
|
||||
*
|
||||
* Print BSP tree
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void print_bsp_tree(long process_id)
|
||||
{
|
||||
printf( "**** BSP TREE ***\n" ) ;
|
||||
foreach_patch_in_bsp( _pr_patch, 0, process_id ) ;
|
||||
printf( "\n\n" ) ;
|
||||
}
|
||||
|
||||
void _pr_patch(Patch *patch, long dummy, long process_id)
|
||||
{
|
||||
print_patch( patch, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods for PlaneEqu object
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
*
|
||||
* plane_equ()
|
||||
*
|
||||
* Returns the value H(Px, Py, Pz) where H is the equation of the plane
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
float plane_equ(PlaneEqu *plane, Vertex *point, long process_id)
|
||||
{
|
||||
float h ;
|
||||
h = plane->c + point->x * plane->n.x
|
||||
+ point->y * plane->n.y
|
||||
+ point->z * plane->n.z ;
|
||||
|
||||
return( h ) ;
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* comp_plane_equ()
|
||||
*
|
||||
* Compute plane equation from the three vertices on the plane.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
float comp_plane_equ(PlaneEqu *pln, Vertex *p1, Vertex *p2, Vertex *p3, long process_id)
|
||||
{
|
||||
float length ;
|
||||
|
||||
/* Compute normal vector */
|
||||
length = plane_normal( &pln->n, p1, p2, p3 ) ;
|
||||
|
||||
/* Calculate constant factor */
|
||||
pln->c = -inner_product( &pln->n, p1 ) ;
|
||||
|
||||
return( length ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* point_intersection()
|
||||
* patch_intersection()
|
||||
*
|
||||
* Returns the intersection code according to the relationship between the
|
||||
* point/patch and the plane.
|
||||
* point_intersection() returns 2 bits code that represents:
|
||||
* 01: Point is on the positive side (H(x,y,z) > 0)
|
||||
* 10: Point is on the negative side (H(x,y,z) < 0)
|
||||
* 00: Point is on the plane (H(x,y,z) = 0)
|
||||
*
|
||||
* patch_intersection() returns 3 sets of 2 bits code each represents the
|
||||
* relationship of each vertex of the triangle patch.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
long point_intersection(PlaneEqu *plane, Vertex *point, long process_id)
|
||||
{
|
||||
float h ;
|
||||
long result_code = 0 ;
|
||||
|
||||
/* Compare H(x,y,z) against allowance */
|
||||
if( (h = plane_equ( plane, point, process_id )) < -F_COPLANAR )
|
||||
result_code |= POINT_NEGATIVE_SIDE ;
|
||||
if( h > F_COPLANAR )
|
||||
result_code |= POINT_POSITIVE_SIDE ;
|
||||
|
||||
return( result_code ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
long patch_intersection(PlaneEqu *plane, Vertex *p1, Vertex *p2, Vertex *p3, long process_id)
|
||||
{
|
||||
long c1, c2, c3 ;
|
||||
|
||||
c1 = point_intersection( plane, p1, process_id ) ;
|
||||
c2 = point_intersection( plane, p2, process_id ) ;
|
||||
c3 = point_intersection( plane, p3, process_id ) ;
|
||||
|
||||
return( (c3 << 4) | (c2 << 2) | c1 ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* print_plane_equ()
|
||||
*
|
||||
* Print plane equation
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void print_plane_equ(PlaneEqu *peq, long process_id)
|
||||
{
|
||||
printf( "\tPLN: %.3f x + %.3f y + %.3f z + %.3f\n",
|
||||
peq->n.x, peq->n.y, peq->n.z, peq->c ) ;
|
||||
}
|
||||
|
||||
|
1188
splash2/codes/apps/radiosity/rad_main.C
Normal file
1188
splash2/codes/apps/radiosity/rad_main.C
Normal file
File diff suppressed because it is too large
Load diff
536
splash2/codes/apps/radiosity/rad_tools.C
Normal file
536
splash2/codes/apps/radiosity/rad_tools.C
Normal file
|
@ -0,0 +1,536 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/**************************************************************
|
||||
*
|
||||
* Utility package
|
||||
*
|
||||
***************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
EXTERN_ENV;
|
||||
|
||||
include(radiosity.h)
|
||||
|
||||
static void clear_element_radiosity(Element *elem, long dummy, long process_id);
|
||||
|
||||
/***************************************
|
||||
*
|
||||
* Global variables
|
||||
*
|
||||
****************************************/
|
||||
|
||||
#define MAX_INTERACTION_PER_ELEMENT (100)
|
||||
|
||||
long total_patches ;
|
||||
long total_elements ;
|
||||
long total_equiv_elements ;
|
||||
long total_interactions ;
|
||||
long total_comp_visible_interactions ;
|
||||
long total_invisible_interactions ;
|
||||
long total_match3, total_match2, total_match1, total_match0 ;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
long count ;
|
||||
float area ;
|
||||
} Elem_Interaction ;
|
||||
|
||||
Elem_Interaction elem_interaction[MAX_INTERACTION_PER_ELEMENT+1] ;
|
||||
Elem_Interaction many_interaction ;
|
||||
|
||||
|
||||
/***************************************
|
||||
*
|
||||
* Prinit statistics
|
||||
*
|
||||
****************************************/
|
||||
|
||||
void print_statistics(FILE *fd, long process_id)
|
||||
{
|
||||
long i ;
|
||||
|
||||
/* Initialize information */
|
||||
total_patches = 0 ;
|
||||
total_elements = 0 ;
|
||||
total_equiv_elements = 0 ;
|
||||
total_interactions = 0 ;
|
||||
total_comp_visible_interactions = 0 ;
|
||||
total_invisible_interactions = 0 ;
|
||||
total_match3 = 0 ;
|
||||
total_match2 = 0 ;
|
||||
total_match1 = 0 ;
|
||||
total_match0 = 0 ;
|
||||
|
||||
for( i = 0 ; i < MAX_INTERACTION_PER_ELEMENT ; i++ )
|
||||
{
|
||||
elem_interaction[i].count = 0 ;
|
||||
elem_interaction[i].area = 0 ;
|
||||
}
|
||||
many_interaction.count = 0 ;
|
||||
many_interaction.area = 0 ;
|
||||
|
||||
foreach_patch_in_bsp( get_patch_stat, 0, 0 ) ;
|
||||
|
||||
fprintf( fd, "Rasiosity Statistics\n\n" ) ;
|
||||
|
||||
fprintf( fd, " Histogram of interactions/elem\n" ) ;
|
||||
fprintf( fd, "\t Interactions Occurrence\n" ) ;
|
||||
fprintf( fd, "\t -------------------------------\n" ) ;
|
||||
if( many_interaction.count > 0 )
|
||||
{
|
||||
fprintf( fd, "\t (Over %d) %ld (%f)\n",
|
||||
MAX_INTERACTION_PER_ELEMENT,
|
||||
many_interaction.count,
|
||||
many_interaction.area / many_interaction.count ) ;
|
||||
}
|
||||
for( i = MAX_INTERACTION_PER_ELEMENT ;
|
||||
elem_interaction[i].count == 0 ; i-- ) ;
|
||||
for( ; i >= 0 ; i-- )
|
||||
{
|
||||
if( elem_interaction[i].count == 0 )
|
||||
continue ;
|
||||
|
||||
if( elem_interaction[i].count == 0 )
|
||||
fprintf( fd, "\t %ld %ld (---)\n",
|
||||
i, elem_interaction[i].count ) ;
|
||||
|
||||
else
|
||||
fprintf( fd, "\t %ld %ld (%f)\n",
|
||||
i, elem_interaction[i].count,
|
||||
elem_interaction[i].area / elem_interaction[i].count);
|
||||
}
|
||||
|
||||
fprintf( fd, " Configurations\n" ) ;
|
||||
#if PATCH_ASSIGNMENT == PATCH_ASSIGNMENT_COSTBASED
|
||||
fprintf( fd, "\tPatch assignment: Costbased\n" ) ;
|
||||
fprintf( fd, "\tUsing non-greedy cost-based algorithm\n") ;
|
||||
#endif
|
||||
#if PATCH_ASSIGNMENT == PATCH_ASSIGNMENT_STATIC
|
||||
fprintf( fd, "\tPatch assignment: Static equal number\n" ) ;
|
||||
#endif
|
||||
|
||||
fprintf( fd, "\tAlways inserting at top of list for visibility testing (not sorted)\n" ) ;
|
||||
fprintf( fd, "\tRecursive pruning enabled for BSP tree traversal\n" ) ;
|
||||
fprintf( fd, "\tPatch cache: Enabled\n" ) ;
|
||||
fprintf( fd, "\tAlways check all other queues when task stealing (not neighbor scheme)\n" ) ;
|
||||
|
||||
|
||||
fprintf( fd, " Parameters\n" ) ;
|
||||
fprintf( fd, "\tNumber of processors: %ld\n", n_processors ) ;
|
||||
fprintf( fd, "\tNumber of task queues: %ld\n", n_taskqueues ) ;
|
||||
fprintf( fd, "\tNumber of tasks / queue: %ld\n", n_tasks_per_queue ) ;
|
||||
fprintf( fd, "\tArea epsilon: %f\n", Area_epsilon ) ;
|
||||
fprintf( fd, "\t#inter parallel refine: %ld\n",
|
||||
N_inter_parallel_bf_refine);
|
||||
fprintf( fd, "\t#visibility comp / task: %ld\n", N_visibility_per_task ) ;
|
||||
fprintf( fd, "\tBF epsilon: %f\n", BFepsilon ) ;
|
||||
fprintf( fd, "\tEnergy convergence: %f\n", Energy_epsilon ) ;
|
||||
|
||||
fprintf( fd, " Iterations to converge: %ld times\n",
|
||||
global->iteration_count ) ;
|
||||
|
||||
fprintf( fd, " Resource Usage\n" ) ;
|
||||
fprintf( fd, "\tNumber of patches: %ld\n", total_patches ) ;
|
||||
fprintf( fd, "\tTotal number of elements: %ld\n", total_elements ) ;
|
||||
fprintf( fd, "\tTotal number of interactions: %ld\n", total_interactions);
|
||||
fprintf( fd, "\t completely visible: %ld\n",
|
||||
total_comp_visible_interactions ) ;
|
||||
fprintf( fd, "\t completely invisible: %ld\n",
|
||||
total_invisible_interactions ) ;
|
||||
fprintf( fd, "\t partially visible: %ld\n",
|
||||
total_interactions - total_comp_visible_interactions
|
||||
- total_invisible_interactions ) ;
|
||||
fprintf( fd, "\tInteraction coherence (root interaction not counted)\n");
|
||||
fprintf( fd, "\t Common for 4 siblings: %ld\n", total_match3 ) ;
|
||||
fprintf( fd, "\t Common for 3 siblings: %ld\n", total_match2 ) ;
|
||||
fprintf( fd, "\t Common for 2 siblings: %ld\n", total_match1 ) ;
|
||||
fprintf( fd, "\t Common for no sibling: %ld\n", total_match0 ) ;
|
||||
fprintf( fd, "\tAvg. elements per patch: %.1f\n",
|
||||
(float)total_elements / (float)total_patches ) ;
|
||||
fprintf( fd, "\tAvg. interactions per patch: %.1f\n",
|
||||
(float)total_interactions / (float)total_patches ) ;
|
||||
fprintf( fd, "\tAvg. interactions per element:%.1f\n",
|
||||
(float)total_interactions / (float)total_elements ) ;
|
||||
fprintf( fd, "\tNumber of elements in equivalent uniform mesh: %ld\n",
|
||||
total_equiv_elements ) ;
|
||||
fprintf( fd, "\tElem(hierarchical)/Elem(uniform): %.2f%%\n",
|
||||
(float)total_elements / (float)total_equiv_elements * 100.0 ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**********************************************************
|
||||
*
|
||||
* print_per_process_info()
|
||||
*
|
||||
***********************************************************/
|
||||
|
||||
void print_per_process_info(FILE *fd, long process)
|
||||
{
|
||||
long cache_line ;
|
||||
long iteration ;
|
||||
StatisticalInfo *ps ;
|
||||
Element *e ;
|
||||
|
||||
ps = &global->stat_info[process] ;
|
||||
|
||||
fprintf( fd, "\t\tModeling tasks: %ld\n",
|
||||
ps->total_modeling_tasks ) ;
|
||||
fprintf( fd, "\t\tDefine patch tasks: %ld\n",
|
||||
ps->total_def_patch_tasks ) ;
|
||||
fprintf( fd, "\t\tFF refinement tasks: %ld\n",
|
||||
ps->total_ff_ref_tasks ) ;
|
||||
fprintf( fd, "\t\tRay processing tasks: %ld\n",
|
||||
ps->total_ray_tasks ) ;
|
||||
fprintf( fd, "\t\tRadiosity Avg/Norm tasks: %ld\n",
|
||||
ps->total_radavg_tasks ) ;
|
||||
fprintf( fd, "\t\tInteraction computations: %ld\n",
|
||||
ps->total_interaction_comp ) ;
|
||||
fprintf( fd, "\t\tVisibility computations: %ld\n",
|
||||
ps->total_visibility_comp ) ;
|
||||
fprintf( fd, "\t\t (%ld of %ld were partially visible)\n",
|
||||
ps->partially_visible,
|
||||
ps->total_visibility_comp ) ;
|
||||
fprintf( fd, "\t\tRay intersection tests: %ld\n",
|
||||
ps->total_ray_intersect_test ) ;
|
||||
fprintf( fd, "\t\tPatch cache hit ratio: %.2f%%\n",
|
||||
ps->total_patch_cache_hit * 100 /
|
||||
(ps->total_patch_cache_check + 0.01) ) ;
|
||||
for( cache_line = 0 ; cache_line < PATCH_CACHE_SIZE ; cache_line++ )
|
||||
fprintf( fd, "\t\t (level %ld): %.2f%%\n",
|
||||
cache_line,
|
||||
ps->patch_cache_hit[cache_line] * 100 /
|
||||
(ps->total_patch_cache_check + 0.01));
|
||||
|
||||
/* Per iteration info */
|
||||
fprintf( fd, "\t\tPer iteration info.\n" ) ;
|
||||
for( iteration = 0 ; iteration < global->iteration_count ; iteration++ )
|
||||
{
|
||||
fprintf( fd, "\t\t [%ld] Interaction comp: %ld\n",
|
||||
iteration, ps->per_iteration[iteration].visibility_comp ) ;
|
||||
fprintf( fd, "\t\t Ray Intersection: %ld\n",
|
||||
ps->per_iteration[iteration].ray_intersect_test ) ;
|
||||
fprintf( fd, "\t\t Tasks from my Q: %ld\n",
|
||||
ps->per_iteration[iteration].tasks_from_myq ) ;
|
||||
fprintf( fd, "\t\t Tasks from other Q: %ld\n",
|
||||
ps->per_iteration[iteration].tasks_from_otherq ) ;
|
||||
fprintf( fd, "\t\t Process_task wait count: %ld\n",
|
||||
ps->per_iteration[iteration].process_tasks_wait ) ;
|
||||
e = ps->per_iteration[iteration].last_pr_task ;
|
||||
if( e == 0 )
|
||||
continue ;
|
||||
if( e->parent == 0 )
|
||||
{
|
||||
fprintf( fd, "\t\t Last task: Patch level\n" ) ;
|
||||
fprintf( fd, "\t\t (%ld root inter)\n",
|
||||
e->n_interactions ) ;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf( fd, "\t\t Last task: Elem level\n" ) ;
|
||||
fprintf( fd, "\t\t (%ld inter, %.3f Elem/Patch)\n",
|
||||
e->n_interactions, e->area / e->patch->area ) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**********************************************************
|
||||
*
|
||||
* get_patch_stat()
|
||||
*
|
||||
***********************************************************/
|
||||
|
||||
long n_elements_in_patch ;
|
||||
long n_equiv_elem_in_patch ;
|
||||
float min_elem_area ;
|
||||
long n_interactions_in_patch ;
|
||||
long n_comp_visible_interactions ;
|
||||
long n_invisible_interactions ;
|
||||
|
||||
|
||||
void get_patch_stat(Patch *patch, long dummy, long process_id)
|
||||
{
|
||||
/* Initialize stat info for element */
|
||||
n_elements_in_patch = 0 ;
|
||||
n_equiv_elem_in_patch = 1 ;
|
||||
min_elem_area = patch->area ;
|
||||
n_interactions_in_patch = 0 ;
|
||||
n_comp_visible_interactions = 0 ;
|
||||
n_invisible_interactions = 0 ;
|
||||
|
||||
/* Traverse the quad tree */
|
||||
foreach_element_in_patch( patch, get_elem_stat, 0, process_id ) ;
|
||||
|
||||
/* Update global stat variables */
|
||||
total_patches++ ;
|
||||
total_elements += n_elements_in_patch ;
|
||||
total_equiv_elements += n_equiv_elem_in_patch ;
|
||||
total_interactions += n_interactions_in_patch ;
|
||||
total_comp_visible_interactions += n_comp_visible_interactions ;
|
||||
total_invisible_interactions += n_invisible_interactions ;
|
||||
|
||||
#if PATCH_ASSIGNMENT == PATCH_ASSIGNMENT_COSTBASED
|
||||
if( n_interactions_in_patch
|
||||
!= global->patch_cost[patch->seq_no].n_total_inter )
|
||||
{
|
||||
printf( "Error: patch(%d) Inter counted: %d (n_total_inter %d)\n",
|
||||
patch->seq_no,
|
||||
n_interactions_in_patch,
|
||||
global->patch_cost[patch->seq_no].n_total_inter ) ;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void get_elem_stat(Element *elem, long dummy, long process_id)
|
||||
{
|
||||
Interaction *pi ;
|
||||
long p_visible = 0 ;
|
||||
long c_visible = 0 ;
|
||||
long i_visible = 0 ;
|
||||
long match0, match1, match2, match3 ;
|
||||
|
||||
n_elements_in_patch++ ;
|
||||
|
||||
while( elem->area < min_elem_area )
|
||||
{
|
||||
min_elem_area *= 0.25 ;
|
||||
n_equiv_elem_in_patch *= 4 ;
|
||||
}
|
||||
|
||||
/* Classify visibility */
|
||||
n_interactions_in_patch += elem->n_interactions ;
|
||||
for( pi = elem->interactions ; pi ; pi = pi->next )
|
||||
{
|
||||
if( pi->visibility == 0.0 )
|
||||
i_visible++ ;
|
||||
else if( pi->visibility == 1.0 )
|
||||
c_visible++ ;
|
||||
else
|
||||
p_visible++ ;
|
||||
}
|
||||
if( i_visible + c_visible + p_visible != elem->n_interactions )
|
||||
printf( "Fatal: Interactions count miss match\n" ) ;
|
||||
if( elem->n_vis_undef_inter != 0 )
|
||||
printf( "Fatal: Visibility undef list count non zero(%ld)\n",
|
||||
elem->n_vis_undef_inter ) ;
|
||||
if( elem->vis_undef_inter != 0 )
|
||||
printf( "Fatal: Visibility undef list not empty\n" ) ;
|
||||
|
||||
n_comp_visible_interactions += c_visible ;
|
||||
n_invisible_interactions += i_visible ;
|
||||
|
||||
|
||||
/* Count interactions / element */
|
||||
if( elem->n_interactions > MAX_INTERACTION_PER_ELEMENT )
|
||||
{
|
||||
many_interaction.count++ ;
|
||||
many_interaction.area += elem->area ;
|
||||
}
|
||||
else
|
||||
{
|
||||
elem_interaction[ elem->n_interactions ].count++ ;
|
||||
elem_interaction[ elem->n_interactions ].area += elem->area ;
|
||||
}
|
||||
|
||||
/* Analyze object coherence */
|
||||
if( ! LEAF_ELEMENT( elem ) )
|
||||
{
|
||||
match0 = match1 = match2 = match3 = 0 ;
|
||||
|
||||
count_interaction(elem->center, elem->top, elem->right, elem->left,
|
||||
&match3, &match2, &match1, &match0, process_id ) ;
|
||||
count_interaction(elem->top, elem->right, elem->left, elem->center,
|
||||
&match3, &match2, &match1, &match0, process_id ) ;
|
||||
count_interaction(elem->right, elem->left, elem->center, elem->top,
|
||||
&match3, &match2, &match1, &match0, process_id ) ;
|
||||
count_interaction(elem->left, elem->center, elem->top, elem->right,
|
||||
&match3, &match2, &match1, &match0, process_id ) ;
|
||||
|
||||
total_match3 += match3 ;
|
||||
total_match2 += match2 ;
|
||||
total_match1 += match1 ;
|
||||
total_match0 += match0 ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void count_interaction(Element *es, Element *e1, Element *e2, Element *e3, long *c3, long *c2, long *c1, long *c0, long process_id)
|
||||
{
|
||||
Interaction *pi ;
|
||||
long occurrence ;
|
||||
|
||||
for( pi = es->interactions ; pi ; pi = pi->next )
|
||||
{
|
||||
occurrence = search_intearction( e1->interactions, pi, process_id ) ;
|
||||
occurrence += search_intearction( e2->interactions, pi, process_id ) ;
|
||||
occurrence += search_intearction( e3->interactions, pi, process_id ) ;
|
||||
switch( occurrence )
|
||||
{
|
||||
case 0: (*c0)++ ; break ;
|
||||
case 1: (*c1)++ ; break ;
|
||||
case 2: (*c2)++ ; break ;
|
||||
case 3: (*c3)++ ; break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
long search_intearction(Interaction *int_list, Interaction *inter, long process_id)
|
||||
{
|
||||
while( int_list )
|
||||
{
|
||||
if( int_list->destination == inter->destination )
|
||||
return( 1 ) ;
|
||||
|
||||
int_list = int_list->next ;
|
||||
}
|
||||
|
||||
return( 0 ) ;
|
||||
}
|
||||
|
||||
/***************************************
|
||||
*
|
||||
* Prinit running time
|
||||
*
|
||||
****************************************/
|
||||
|
||||
void print_running_time(long process_id)
|
||||
{
|
||||
long time_diff, time_diff1 ;
|
||||
|
||||
time_diff = time_rad_end - time_rad_start ;
|
||||
time_diff1 = time_rad_end - timing[0]->rad_start;
|
||||
if( time_diff < 0 )
|
||||
time_diff += CLOCK_MAX_VAL ;
|
||||
if( time_diff1 < 0 )
|
||||
time_diff1 += CLOCK_MAX_VAL ;
|
||||
|
||||
printf( "\tOverall start time\t%20lu\n", time_rad_start);
|
||||
printf( "\tOverall end time\t%20lu\n", time_rad_end);
|
||||
printf( "\tTotal time with initialization\t%20lu\n", time_diff);
|
||||
printf( "\tTotal time without initialization\t%20lu\n", time_diff1);
|
||||
}
|
||||
|
||||
|
||||
/***************************************
|
||||
*
|
||||
* Print process creation overhead
|
||||
*
|
||||
****************************************/
|
||||
|
||||
void print_fork_time(long process_id)
|
||||
{
|
||||
long pid ;
|
||||
|
||||
if( n_processors <= 1 )
|
||||
return ;
|
||||
|
||||
printf( "\tProcess fork overhead\n" ) ;
|
||||
for( pid = 0 ; pid < n_processors-1 ; pid++ )
|
||||
{
|
||||
printf( "\t Process %ld %.2f mS\n",
|
||||
pid,
|
||||
(timing[pid]->rad_start - time_rad_start) / 1000.0 ) ;
|
||||
}
|
||||
|
||||
printf( "\t (total) %.2f mS\n",
|
||||
(time_process_start[n_processors-2] - time_rad_start) / 1000.0 ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************
|
||||
*
|
||||
* Initialize statistical info
|
||||
*
|
||||
****************************************/
|
||||
|
||||
void init_stat_info(long process_id)
|
||||
{
|
||||
long pid ;
|
||||
long i ;
|
||||
StatisticalInfo *ps ;
|
||||
|
||||
for( pid = 0 ; pid < MAX_PROCESSORS ; pid++ )
|
||||
{
|
||||
ps = &global->stat_info[ pid ] ;
|
||||
ps->total_modeling_tasks = 0 ;
|
||||
ps->total_def_patch_tasks = 0 ;
|
||||
ps->total_ff_ref_tasks = 0 ;
|
||||
ps->total_ray_tasks = 0 ;
|
||||
ps->total_radavg_tasks = 0 ;
|
||||
ps->total_interaction_comp = 0 ;
|
||||
ps->total_visibility_comp = 0 ;
|
||||
ps->partially_visible = 0 ;
|
||||
ps->total_ray_intersect_test= 0 ;
|
||||
ps->total_patch_cache_check = 0 ;
|
||||
ps->total_patch_cache_hit = 0 ;
|
||||
|
||||
for( i = 0 ; i < PATCH_CACHE_SIZE ; i++ )
|
||||
ps->patch_cache_hit[i] = 0 ;
|
||||
|
||||
for( i = 0 ; i < MAX_ITERATION_INFO ; i++ )
|
||||
{
|
||||
ps->per_iteration[ i ].visibility_comp = 0 ;
|
||||
ps->per_iteration[ i ].ray_intersect_test = 0 ;
|
||||
ps->per_iteration[ i ].tasks_from_myq = 0 ;
|
||||
ps->per_iteration[ i ].tasks_from_otherq = 0 ;
|
||||
ps->per_iteration[ i ].process_tasks_wait = 0 ;
|
||||
ps->per_iteration[ i ].last_pr_task = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************
|
||||
*
|
||||
* clear_radiosity()
|
||||
*
|
||||
***********************************************************/
|
||||
|
||||
|
||||
void clear_radiosity(long process_id)
|
||||
{
|
||||
foreach_patch_in_bsp( clear_patch_radiosity, 0, 0 ) ;
|
||||
}
|
||||
|
||||
|
||||
void clear_patch_radiosity(Patch *patch, long dummy, long process_id)
|
||||
{
|
||||
foreach_element_in_patch( patch, clear_element_radiosity, 0, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
static void clear_element_radiosity(Element *elem, long dummy, long process_id)
|
||||
{
|
||||
elem->rad.r = 0 ;
|
||||
elem->rad.g = 0 ;
|
||||
elem->rad.b = 0 ;
|
||||
|
||||
elem->rad_subtree.r = 0 ;
|
||||
elem->rad_subtree.g = 0 ;
|
||||
elem->rad_subtree.b = 0 ;
|
||||
|
||||
global->prev_total_energy = global->total_energy ;
|
||||
global->total_energy.r = 0 ;
|
||||
global->total_energy.g = 0 ;
|
||||
global->total_energy.b = 0 ;
|
||||
}
|
479
splash2/codes/apps/radiosity/radiosity.H
Normal file
479
splash2/codes/apps/radiosity/radiosity.H
Normal file
|
@ -0,0 +1,479 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
|
||||
/* This file contains many constant definitions that control the execution
|
||||
of the program, as well as lobal data structure declarations */
|
||||
|
||||
#ifndef _RADIOSITY_H
|
||||
#define _RADIOSITY_H
|
||||
|
||||
#include <math.h>
|
||||
include(parallel.h)
|
||||
include(patch.h)
|
||||
include(model.h)
|
||||
include(task.h)
|
||||
#include "glib.h"
|
||||
#include "pslib.h"
|
||||
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Configuration Parameters
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
/*************************************************************************
|
||||
*
|
||||
* Task scheduling & Load balancing (1)
|
||||
* --- Assignment of the patches to the processors
|
||||
*
|
||||
* This macro specifies how patches are assigned to the task queues (ie,
|
||||
* processors).
|
||||
* - PATCH_ASSIGNMENT_STATIC assigns the same set of patches to the same
|
||||
* queue repeatedly over iterations.
|
||||
* - PATCH_ASSIGNMENT_COSTBASED assigns patches to queues based on the
|
||||
* work associated with those patches in previous iterations, in order
|
||||
* to try to balance the initial workload assignment among processors
|
||||
* and hence reduce task stealing.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#define PATCH_ASSIGNMENT_STATIC (1)
|
||||
#define PATCH_ASSIGNMENT_COSTBASED (3)
|
||||
|
||||
#if !defined(PATCH_ASSIGNMENT)
|
||||
#define PATCH_ASSIGNMENT PATCH_ASSIGNMENT_STATIC
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Constants
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
|
||||
#define F_ZERO (1.0e-6)
|
||||
|
||||
#if defined(SIMULATOR)
|
||||
#define MAX_PROCESSORS (128) /* Maximum number of processors
|
||||
(i.e., processes) created */
|
||||
#define MAX_TASKQUEUES (128) /* Maximum number of task queues */
|
||||
#define MAX_TASKS (32768) /* # of available task descriptors */
|
||||
#define MAX_PATCHES (1024) /* # of available patch objects */
|
||||
#define MAX_ELEMENTS (80000) /* # of available element objects */
|
||||
#define MAX_INTERACTIONS (640000) /* # of available interaction objs */
|
||||
#define MAX_ELEMVERTICES (65536) /* # of available ElemVertex objs */
|
||||
#define MAX_EDGES (65536) /* # of available Edge objs */
|
||||
#endif
|
||||
|
||||
#if defined(DASH)
|
||||
#define MAX_PROCESSORS (64) /* Maximum number of processors
|
||||
(i.e., processes) created */
|
||||
#define MAX_TASKQUEUES (64) /* Maximum number of task queues */
|
||||
#define MAX_TASKS (32768) /* # of available task descriptors */
|
||||
#define MAX_PATCHES (1024) /* # of available patch objects */
|
||||
#define MAX_ELEMENTS (80000) /* # of available element objects */
|
||||
#define MAX_INTERACTIONS (640000) /* # of available interaction objs */
|
||||
#define MAX_ELEMVERTICES (65536) /* # of available ElemVertex objs */
|
||||
#define MAX_EDGES (65536) /* # of available Edge objs */
|
||||
#endif
|
||||
|
||||
#if defined(SGI_GL)
|
||||
#define MAX_PROCESSORS (8) /* Maximum number of processors
|
||||
(i.e., processes) created */
|
||||
#define MAX_TASKQUEUES (8) /* Maximum number of task queues */
|
||||
#define MAX_TASKS (8192) /* # of available task descriptors */
|
||||
#define MAX_PATCHES (1024) /* # of available patch objects */
|
||||
#define MAX_ELEMENTS (40000) /* # of available element objects */
|
||||
#define MAX_INTERACTIONS (320000) /* # of available interaction objs */
|
||||
#define MAX_ELEMVERTICES (16384) /* # of available ElemVertex objs */
|
||||
#define MAX_EDGES (65536) /* # of available Edge objs */
|
||||
#endif
|
||||
|
||||
#if defined(SUN4)
|
||||
#define MAX_PROCESSORS (1) /* Maximum number of processors
|
||||
(i.e., processes) created */
|
||||
#define MAX_TASKQUEUES (1) /* Maximum number of task queues */
|
||||
#define MAX_TASKS (1024) /* # of available task descriptors */
|
||||
#define MAX_PATCHES (1024) /* # of available patch objects */
|
||||
#define MAX_ELEMENTS (20000) /* # of available element objects */
|
||||
#define MAX_INTERACTIONS (160000) /* # of available interaction objs */
|
||||
#define MAX_ELEMVERTICES (16384) /* # of available ElemVertex objs */
|
||||
#define MAX_EDGES (32768) /* # of available Edge objs */
|
||||
#endif
|
||||
|
||||
#if (!defined(SIMULATOR) && !defined(DASH) && !defined(SGI_GL) && !defined(SUN4))
|
||||
#define MAX_PROCESSORS (128) /* Maximum number of processors
|
||||
(i.e., processes) created */
|
||||
#define MAX_TASKQUEUES (128) /* Maximum number of task queues */
|
||||
#define MAX_TASKS (32768) /* # of available task descriptors */
|
||||
#define MAX_PATCHES (1024) /* # of available patch objects */
|
||||
#define MAX_ELEMENTS (80000) /* # of available element objects */
|
||||
#define MAX_INTERACTIONS (640000) /* # of available interaction objs */
|
||||
#define MAX_ELEMVERTICES (65536) /* # of available ElemVertex objs */
|
||||
#define MAX_EDGES (65536) /* # of available Edge objs */
|
||||
#endif
|
||||
|
||||
#define MAX_SHARED_LOCK (3900) /* Maximum locks allocated. Objects
|
||||
share these locks */
|
||||
|
||||
#if defined(SGI_GL) || defined(DASH) || defined(SIMULATOR)
|
||||
#define CLOCK_MAX_VAL (2048*1000000) /* ANL macro clock max value */
|
||||
#elif defined(SUN4)
|
||||
#define CLOCK_MAX_VAL (65536*1000000) /* ANL macro clock max value */
|
||||
#else
|
||||
#define CLOCK_MAX_VAL (2048*1000000) /* ANL macro clock max value */
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* System defaults
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
#define DEFAULT_N_PROCESSORS (1)
|
||||
#define DEFAULT_N_TASKQUEUES (1)
|
||||
#define DEFAULT_N_TASKS_PER_QUEUE (200)
|
||||
/* Create new tasks if number of tasks currently
|
||||
in the queue is less than this number */
|
||||
#define DEFAULT_N_INTER_PARALLEL_BF_REFINEMENT (5)
|
||||
/* If the number of interactions is greater than
|
||||
or equal to this value, BF-refinement is
|
||||
performed in parallel */
|
||||
#define DEFAULT_N_VISIBILITY_PER_TASK (4)
|
||||
/* Number of visibility computations per
|
||||
visibility task */
|
||||
#define DEFAULT_AREA_EPSILON (2000.0)
|
||||
/* If element is smaller than this value,
|
||||
no further subdivision takes place */
|
||||
#define DEFAULT_ENERGY_EPSILON (0.005)
|
||||
/* Terminate radiosity iteration if the
|
||||
difference of total energy is less than this
|
||||
value. */
|
||||
#define DEFAULT_BFEPSILON (0.015)
|
||||
/* BF refinement threshold level. If the estimated
|
||||
error of BF (due to FF error and error due to
|
||||
constant approximation within an element) is
|
||||
larger than this value, then subdivide */
|
||||
|
||||
#define DFLT_VIEW_ROT_X (10.0)
|
||||
#define DFLT_VIEW_ROT_Y (0.0)
|
||||
#define DFLT_VIEW_DIST (8000.0)
|
||||
#define DFLT_VIEW_ZOOM (1.0)
|
||||
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Display mode
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
#define DISPLAY_FILLED (0)
|
||||
#define DISPLAY_SHADED (1)
|
||||
#define DISPLAY_EDGEONLY (2)
|
||||
|
||||
#define DISPLAY_ALL_INTERACTIONS (0)
|
||||
#define DISPLAY_HALF_INTERACTIONS (1)
|
||||
|
||||
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Statistical Measure
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
#define MAX_ITERATION_INFO (16)
|
||||
|
||||
struct _element ;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
long visibility_comp ;
|
||||
long ray_intersect_test ;
|
||||
long tasks_from_myq ;
|
||||
long tasks_from_otherq ;
|
||||
long process_tasks_wait ;
|
||||
struct _element *last_pr_task ;
|
||||
} PerIterationInfo ;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char pad1[PAGE_SIZE]; /* padding to avoid false-sharing
|
||||
and allow page-placement */
|
||||
long total_modeling_tasks ;
|
||||
long total_def_patch_tasks ;
|
||||
long total_ff_ref_tasks ;
|
||||
long total_ray_tasks ;
|
||||
long total_radavg_tasks ;
|
||||
long total_direct_radavg_tasks ;
|
||||
long total_interaction_comp ;
|
||||
long total_visibility_comp ;
|
||||
long partially_visible ;
|
||||
long total_ray_intersect_test ;
|
||||
long total_patch_cache_check ;
|
||||
long total_patch_cache_hit ;
|
||||
long patch_cache_hit[PATCH_CACHE_SIZE] ;
|
||||
PerIterationInfo per_iteration[ MAX_ITERATION_INFO ] ;
|
||||
char pad2[PAGE_SIZE]; /* padding to avoid false-sharing
|
||||
and allow page-placement */
|
||||
} StatisticalInfo ;
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Shared data structure definition.
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
long rad_start, rad_time, refine_time, wait_time, vertex_time;
|
||||
} Timing;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
||||
/* Task queue */
|
||||
/* ***** */ long index;
|
||||
/* ***** */ LOCKDEC(index_lock)
|
||||
Task_Queue task_queue[ MAX_TASKQUEUES ] ;
|
||||
Task task_buf[ MAX_TASKS ] ;
|
||||
|
||||
/* BSP tree root */
|
||||
LOCKDEC(bsp_tree_lock)
|
||||
Patch *bsp_root ;
|
||||
|
||||
/* Average radiosity value */
|
||||
LOCKDEC(avg_radiosity_lock)
|
||||
long converged ;
|
||||
Rgb prev_total_energy ;
|
||||
Rgb total_energy ;
|
||||
float total_patch_area ;
|
||||
long iteration_count ;
|
||||
|
||||
/* Computation cost estimate */
|
||||
LOCKDEC(cost_sum_lock)
|
||||
long cost_sum ;
|
||||
long cost_estimate_sum ;
|
||||
Patch_Cost patch_cost[ MAX_PATCHES ] ;
|
||||
|
||||
/* Barrier */
|
||||
BARDEC(barrier)
|
||||
|
||||
/* Private varrier */
|
||||
long pbar_count ;
|
||||
LOCKDEC(pbar_lock)
|
||||
|
||||
/* Task initializer counter */
|
||||
long task_counter ;
|
||||
LOCKDEC(task_counter_lock)
|
||||
|
||||
/* Resource buffers */
|
||||
LOCKDEC(free_patch_lock)
|
||||
Patch *free_patch ;
|
||||
long n_total_patches ;
|
||||
long n_free_patches ;
|
||||
Patch patch_buf[ MAX_PATCHES ] ;
|
||||
|
||||
LOCKDEC(free_element_lock)
|
||||
Element *free_element ;
|
||||
long n_free_elements ;
|
||||
Element element_buf[ MAX_ELEMENTS ] ;
|
||||
|
||||
LOCKDEC(free_interaction_lock)
|
||||
Interaction *free_interaction ;
|
||||
long n_free_interactions ;
|
||||
Interaction interaction_buf[ MAX_INTERACTIONS ] ;
|
||||
|
||||
LOCKDEC(free_elemvertex_lock)
|
||||
long free_elemvertex ;
|
||||
ElemVertex elemvertex_buf[ MAX_ELEMVERTICES ] ;
|
||||
|
||||
LOCKDEC(free_edge_lock)
|
||||
long free_edge ;
|
||||
Edge edge_buf[ MAX_EDGES ] ;
|
||||
|
||||
Shared_Lock sh_lock[ MAX_SHARED_LOCK ] ;
|
||||
|
||||
StatisticalInfo stat_info[ MAX_PROCESSORS ] ;
|
||||
|
||||
} Global ;
|
||||
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Global variables
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
extern Timing **timing ;
|
||||
extern Global *global ;
|
||||
extern long n_processors ;
|
||||
extern long n_taskqueues ;
|
||||
extern long n_tasks_per_queue ;
|
||||
|
||||
extern long N_inter_parallel_bf_refine ;
|
||||
extern long N_visibility_per_task ;
|
||||
extern float Area_epsilon ;
|
||||
extern float Energy_epsilon ;
|
||||
extern float BFepsilon ;
|
||||
|
||||
extern long batch_mode, verbose_mode ;
|
||||
extern long taskqueue_id[] ;
|
||||
|
||||
extern long time_rad_start, time_rad_end, time_process_start[] ;
|
||||
|
||||
|
||||
/****************************************
|
||||
*
|
||||
* Global function names & types
|
||||
*
|
||||
*****************************************/
|
||||
|
||||
/*
|
||||
* display.C
|
||||
*/
|
||||
void radiosity_averaging(Element *elem, long mode, long process_id);
|
||||
void setup_view(float rot_x, float rot_y, float dist, float zoom, long process_id);
|
||||
void display_scene(long fill_sw, long patch_sw, long mesh_sw, long interaction_sw, long process_id);
|
||||
void display_patch(Patch *patch, long mode, long process_id);
|
||||
void display_patches_in_bsp_tree(long mode, long process_id);
|
||||
void display_element(Element *element, long mode, long process_id);
|
||||
void display_elements_in_patch(Patch *patch, long mode, long process_id);
|
||||
void display_elements_in_bsp_tree(long mode, long process_id);
|
||||
void display_interactions_in_element(Element *elem, long mode, long process_id);
|
||||
void display_interactions_in_patch(Patch *patch, long mode, long process_id);
|
||||
void display_interactions_in_bsp_tree(long process_id);
|
||||
void ps_display_scene(long fill_sw, long patch_sw, long mesh_sw, long interaction_sw, long process_id);
|
||||
void ps_display_patch(Patch *patch, long mode, long process_id);
|
||||
void ps_display_patches_in_bsp_tree(long mode, long process_id);
|
||||
void ps_display_element(Element *element, long mode, long process_id);
|
||||
void ps_display_elements_in_patch(Patch *patch, long mode, long process_id);
|
||||
void ps_display_elements_in_bsp_tree(long mode, long process_id);
|
||||
void ps_display_interactions_in_element(Element *elem, long mode, long process_id);
|
||||
void ps_display_interactions_in_patch(Patch *patch, long mode, long process_id);
|
||||
void ps_display_interactions_in_bsp_tree(long process_id);
|
||||
|
||||
/*
|
||||
* elemman.C
|
||||
*/
|
||||
void foreach_element_in_patch(Patch *patch, void (*func)(), long arg1, long process_id);
|
||||
void foreach_leaf_element_in_patch(Patch *patch, void (*func)(), long arg1, long process_id);
|
||||
void ff_refine_elements(Element *e1, Element *e2, long level, long process_id);
|
||||
long error_analysis(Element *e1, Element *e2, Interaction *inter12, Interaction *inter21, long process_id);
|
||||
void bf_error_analysis_list(Element *elem, Interaction *i_list, long process_id);
|
||||
long bf_error_analysis(Element *elem, Interaction *inter, long process_id);
|
||||
long radiosity_converged(long process_id);
|
||||
void subdivide_element(Element *e, long process_id);
|
||||
void process_rays(Element *e, long process_id);
|
||||
long element_completely_invisible(Element *e1, Element *e2, long process_id);
|
||||
Element *get_element(long process_id);
|
||||
long leaf_element(Element *elem, long process_id);
|
||||
void init_elemlist(long process_id);
|
||||
void print_element(Element *elem, long process_id);
|
||||
void foreach_interaction_in_element(Element *elem, void (*func)(), long arg1, long process_id);
|
||||
void compute_formfactor(Element *e_src, Element *e_dst, Interaction *inter, long process_id);
|
||||
void compute_interaction(Element *e_src, Element *e_dst, Interaction *inter, long subdiv, long process_id);
|
||||
void insert_interaction(Element *elem, Interaction *inter, long process_id);
|
||||
void delete_interaction(Element *elem, Interaction *prev, Interaction *inter, long process_id);
|
||||
void insert_vis_undef_interaction(Element *elem, Interaction *inter, long process_id);
|
||||
void delete_vis_undef_interaction(Element *elem, Interaction *prev, Interaction *inter, long process_id);
|
||||
Interaction *get_interaction(long process_id);
|
||||
void free_interaction(Interaction *interaction, long process_id);
|
||||
void init_interactionlist(long process_id);
|
||||
void print_interaction(Interaction *inter, long process_id);
|
||||
|
||||
/*
|
||||
* rad_main.C
|
||||
*/
|
||||
void start_radiosity(long val);
|
||||
void change_display(long val);
|
||||
void change_view_x(long val);
|
||||
void change_view_y(long val);
|
||||
void change_view_zoom(long val);
|
||||
void change_BFepsilon(long val);
|
||||
void change_area_epsilon(long val);
|
||||
void select_model(long val);
|
||||
void utility_tools(long val);
|
||||
void radiosity(void);
|
||||
long init_ray_tasks(long process_id);
|
||||
void init_radavg_tasks(long mode, long process_id);
|
||||
void init_global(long process_id);
|
||||
void print_usage(void);
|
||||
|
||||
/*
|
||||
* rad_tools.C
|
||||
*/
|
||||
void print_statistics(FILE *fd, long process_id);
|
||||
void print_per_process_info(FILE *fd, long process);
|
||||
void get_patch_stat(Patch *patch, long dummy, long process_id);
|
||||
void get_elem_stat(Element *elem, long dummy, long process_id);
|
||||
void count_interaction(Element *es, Element *e1, Element *e2, Element *e3, long *c3, long *c2, long *c1, long *c0, long process_id);
|
||||
long search_intearction(Interaction *int_list, Interaction *inter, long process_id);
|
||||
void print_running_time(long process_id);
|
||||
void print_fork_time(long process_id);
|
||||
void init_stat_info(long process_id);
|
||||
void clear_radiosity(long process_id);
|
||||
void clear_patch_radiosity(Patch *patch, long dummy, long process_id);
|
||||
|
||||
/*
|
||||
* smallobj.C
|
||||
*/
|
||||
float vector_length(Vertex *v);
|
||||
float distance(Vertex *p1, Vertex *p2);
|
||||
float normalize_vector(Vertex *v1, Vertex *v2);
|
||||
float inner_product(Vertex *v1, Vertex *v2);
|
||||
void cross_product(Vertex *vc, Vertex *v1, Vertex *v2);
|
||||
float plane_normal(Vertex *vc, Vertex *p1, Vertex *p2, Vertex *p3);
|
||||
void center_point(Vertex *p1, Vertex *p2, Vertex *p3, Vertex *pc);
|
||||
void four_center_points(Vertex *p1, Vertex *p2, Vertex *p3, Vertex *pc, Vertex *pc1, Vertex *pc2, Vertex *pc3);
|
||||
void print_point(Vertex *point);
|
||||
void print_rgb(Rgb *rgb);
|
||||
ElemVertex *create_elemvertex(Vertex *p, long process_id);
|
||||
ElemVertex *get_elemvertex(long process_id);
|
||||
void init_elemvertex(long process_id);
|
||||
void foreach_leaf_edge(Edge *edge, long reverse, void (*func)(), long arg1, long arg2, long process_id);
|
||||
Edge *create_edge(ElemVertex *v1, ElemVertex *v2, long process_id);
|
||||
void subdivide_edge(Edge *e, float a_ratio, long process_id);
|
||||
Edge *get_edge(long process_id);
|
||||
void init_edge(long process_id);
|
||||
void init_sharedlock(long process_id);
|
||||
Shared_Lock *get_sharedlock(long segment, long process_id);
|
||||
|
||||
/*
|
||||
* visible.C
|
||||
*/
|
||||
void init_visibility_module(long process_id);
|
||||
void get_test_rays(Vertex *p_src, Ray *v, long no, long process_id);
|
||||
long v_intersect(Patch *patch, Vertex *p, Ray *ray, float t);
|
||||
long traverse_bsp(Patch *src_node, Vertex *p, Ray *ray, float r_min, float r_max, long process_id);
|
||||
long traverse_subtree(Patch *node, Vertex *p, Ray *ray, float r_min, float r_max, long process_id);
|
||||
long intersection_type(Patch *patch, Vertex *p, Ray *ray, float *tval, float range_min, float range_max);
|
||||
long test_intersection(Patch *patch, Vertex *p, Ray *ray, float tval, long process_id);
|
||||
void update_patch_cache(Patch *patch, long process_id);
|
||||
long check_patch_cache(Vertex *p, Ray *ray, float r_min, float r_max, long process_id);
|
||||
void init_patch_cache(long process_id);
|
||||
long patch_tested(Patch *p, long process_id);
|
||||
float visibility(Element *e1, Element *e2, long n_rays, long process_id);
|
||||
void compute_visibility_values(Element *elem, Interaction *inter, long n_inter, long process_id);
|
||||
void visibility_task(Element *elem, Interaction *inter, long n_inter, void (*k)(), long process_id);
|
||||
|
||||
#endif
|
3608
splash2/codes/apps/radiosity/room_model.C
Normal file
3608
splash2/codes/apps/radiosity/room_model.C
Normal file
File diff suppressed because it is too large
Load diff
635
splash2/codes/apps/radiosity/smallobj.C
Normal file
635
splash2/codes/apps/radiosity/smallobj.C
Normal file
|
@ -0,0 +1,635 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Class methods for small simple objects.
|
||||
*
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
EXTERN_ENV;
|
||||
|
||||
include(radiosity.h)
|
||||
|
||||
struct {
|
||||
char pad1[PAGE_SIZE]; /* padding to avoid false-sharing
|
||||
and allow page-placement */
|
||||
long n_local_free_elemvertex ;
|
||||
ElemVertex *local_free_elemvertex ;
|
||||
long n_local_free_edge ;
|
||||
Edge *local_free_edge ;
|
||||
long lock_alloc_counter ;
|
||||
char pad2[PAGE_SIZE]; /* padding to avoid false-sharing
|
||||
and allow page-placement */
|
||||
} sobj_struct[MAX_PROCESSORS];
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods for Vertex object
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* vector_length()
|
||||
*
|
||||
* Comute length of a vector represented by Vertex
|
||||
* length = | v |
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
float vector_length(Vertex *v)
|
||||
{
|
||||
double t0, t1, t2 ;
|
||||
|
||||
t0 = v->x * v->x ;
|
||||
t1 = v->y * v->y ;
|
||||
t2 = v->z * v->z ;
|
||||
|
||||
return( sqrt( t0 + t1 + t2 ) ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* distance()
|
||||
*
|
||||
* Comute distance of two points.
|
||||
* dist = | P1 - P2 |
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
float distance(Vertex *p1, Vertex *p2)
|
||||
{
|
||||
Vertex v12 ;
|
||||
|
||||
v12.x = p2->x - p1->x ;
|
||||
v12.y = p2->y - p1->y ;
|
||||
v12.z = p2->z - p1->z ;
|
||||
|
||||
return( vector_length( &v12 ) ) ;
|
||||
}
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* normalize_vector()
|
||||
*
|
||||
* Normalize vector represented by Vertex
|
||||
* v1 <- normalized( v2 )
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
float normalize_vector(Vertex *v1, Vertex *v2)
|
||||
{
|
||||
float t0 ;
|
||||
float length ;
|
||||
|
||||
length = vector_length( v2 ) ;
|
||||
t0 = (float)1.0 / length ;
|
||||
|
||||
v1->x = v2->x * t0 ;
|
||||
v1->y = v2->y * t0 ;
|
||||
v1->z = v2->z * t0 ;
|
||||
|
||||
return( length ) ;
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* inner_product()
|
||||
*
|
||||
* (v1.v2) <- inner_product( v1, v2 )
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
float inner_product(Vertex *v1, Vertex *v2)
|
||||
{
|
||||
float ip ;
|
||||
|
||||
ip = v1->x * v2->x ;
|
||||
ip += v1->y * v2->y ;
|
||||
ip += v1->z * v2->z ;
|
||||
|
||||
return( ip ) ;
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* cross_product()
|
||||
*
|
||||
* Vc = V1 X V2
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
void cross_product(Vertex *vc, Vertex *v1, Vertex *v2)
|
||||
{
|
||||
vc->x = v1->y * v2->z - v1->z * v2->y ;
|
||||
vc->y = v1->z * v2->x - v1->x * v2->z ;
|
||||
vc->z = v1->x * v2->y - v1->y * v2->x ;
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* plane_normal()
|
||||
*
|
||||
* Vc = (P2-P1) X (P3-P1) / |(P2-P1) X (P3-P1)|
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
float plane_normal(Vertex *vc, Vertex *p1, Vertex *p2, Vertex *p3)
|
||||
{
|
||||
Vertex v1, v2 ;
|
||||
|
||||
/* Compute vectors */
|
||||
v1.x = p2->x - p1->x ;
|
||||
v1.y = p2->y - p1->y ;
|
||||
v1.z = p2->z - p1->z ;
|
||||
|
||||
v2.x = p3->x - p1->x ;
|
||||
v2.y = p3->y - p1->y ;
|
||||
v2.z = p3->z - p1->z ;
|
||||
|
||||
/* Compute cross product and normalize */
|
||||
cross_product( vc, &v1, &v2 ) ;
|
||||
return( normalize_vector( vc, vc ) ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* center_point()
|
||||
*
|
||||
* P = (P1 + P2 + P3) / 3
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
void center_point(Vertex *p1, Vertex *p2, Vertex *p3, Vertex *pc)
|
||||
{
|
||||
/* Compute mid point of the element */
|
||||
|
||||
pc->x = (p1->x + p2->x + p3->x) * (float)(1.0/3.0) ;
|
||||
pc->y = (p1->y + p2->y + p3->y) * (float)(1.0/3.0) ;
|
||||
pc->z = (p1->z + p2->z + p3->z) * (float)(1.0/3.0) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
* four_center_points()
|
||||
*
|
||||
* P = (P1 + P2 + P3) / 3
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
void four_center_points(Vertex *p1, Vertex *p2, Vertex *p3, Vertex *pc, Vertex *pc1, Vertex *pc2, Vertex *pc3)
|
||||
{
|
||||
/* Compute mid point of the element */
|
||||
pc->x = (p1->x + p2->x + p3->x) * (float)(1.0/3.0) ;
|
||||
pc->y = (p1->y + p2->y + p3->y) * (float)(1.0/3.0) ;
|
||||
pc->z = (p1->z + p2->z + p3->z) * (float)(1.0/3.0) ;
|
||||
|
||||
pc1->x = (p1->x * 4 + p2->x + p3->x) * (float)(1.0/6.0) ;
|
||||
pc1->y = (p1->y * 4 + p2->y + p3->y) * (float)(1.0/6.0) ;
|
||||
pc1->z = (p1->z * 4 + p2->z + p3->z) * (float)(1.0/6.0) ;
|
||||
|
||||
pc2->x = (p1->x + p2->x * 4 + p3->x) * (float)(1.0/6.0) ;
|
||||
pc2->y = (p1->y + p2->y * 4 + p3->y) * (float)(1.0/6.0) ;
|
||||
pc2->z = (p1->z + p2->z * 4 + p3->z) * (float)(1.0/6.0) ;
|
||||
|
||||
pc3->x = (p1->x + p2->x + p3->x * 4) * (float)(1.0/6.0) ;
|
||||
pc3->y = (p1->y + p2->y + p3->y * 4) * (float)(1.0/6.0) ;
|
||||
pc3->z = (p1->z + p2->z + p3->z * 4) * (float)(1.0/6.0) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* print_point()
|
||||
*
|
||||
* Print point information.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void print_point(Vertex *point)
|
||||
{
|
||||
printf( "\tP(%.2f, %.2f, %.2f)\n", point->x, point->y, point->z ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods for Rgb object
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* print_rgb()
|
||||
*
|
||||
* Print RGB information.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void print_rgb(Rgb *rgb)
|
||||
{
|
||||
printf( "\tRGB(%.2f, %.2f, %.2f)\n", rgb->r, rgb->g, rgb->b ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods for ElementVertex
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************/
|
||||
/***************************************************************************
|
||||
*
|
||||
* create_elemvertex
|
||||
*
|
||||
* Given Vertex, create and return a new ElemVertex object.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
ElemVertex *create_elemvertex(Vertex *p, long process_id)
|
||||
{
|
||||
ElemVertex *ev_new ;
|
||||
|
||||
ev_new = get_elemvertex(process_id) ;
|
||||
ev_new->p = *p ;
|
||||
|
||||
return( ev_new ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* get_elemvertex
|
||||
*
|
||||
* Returns an ElementVertex object
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
|
||||
ElemVertex *get_elemvertex(long process_id)
|
||||
{
|
||||
ElemVertex *ev ;
|
||||
|
||||
if( sobj_struct[process_id].n_local_free_elemvertex == 0 )
|
||||
{
|
||||
LOCK(global->free_elemvertex_lock);
|
||||
if ( MAX_ELEMVERTICES - global->free_elemvertex
|
||||
< N_ELEMVERTEX_ALLOCATE )
|
||||
{
|
||||
fprintf( stderr, "Fatal:Ran out of ElemVertex buffer\n" ) ;
|
||||
UNLOCK(global->free_elemvertex_lock);
|
||||
exit(1) ;
|
||||
}
|
||||
sobj_struct[process_id].n_local_free_elemvertex = N_ELEMVERTEX_ALLOCATE ;
|
||||
sobj_struct[process_id].local_free_elemvertex
|
||||
= &global->elemvertex_buf[ global->free_elemvertex ] ;
|
||||
global->free_elemvertex += N_ELEMVERTEX_ALLOCATE ;
|
||||
UNLOCK(global->free_elemvertex_lock);
|
||||
}
|
||||
|
||||
ev = sobj_struct[process_id].local_free_elemvertex++ ;
|
||||
sobj_struct[process_id].n_local_free_elemvertex-- ;
|
||||
|
||||
|
||||
/* Initialize contents */
|
||||
ev->col.r = 0.0 ;
|
||||
ev->col.g = 0.0 ;
|
||||
ev->col.b = 0.0 ;
|
||||
ev->weight = 0.0 ;
|
||||
|
||||
return( ev ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* init_elemvertex()
|
||||
*
|
||||
* Initialize ElemVertex buffer.
|
||||
* This routine must be called in single process state.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
void init_elemvertex(long process_id)
|
||||
{
|
||||
long ev_cnt ;
|
||||
|
||||
/* Initialize global free list */
|
||||
LOCKINIT(global->free_elemvertex_lock);
|
||||
global->free_elemvertex = 0 ;
|
||||
|
||||
/* Allocate locks */
|
||||
for( ev_cnt = 0 ; ev_cnt < MAX_ELEMVERTICES ; ev_cnt++ )
|
||||
global->elemvertex_buf[ ev_cnt ].ev_lock
|
||||
= get_sharedlock( SHARED_LOCK_SEGANY, process_id ) ;
|
||||
|
||||
/* Initialize local free list */
|
||||
sobj_struct[process_id].n_local_free_elemvertex = 0 ;
|
||||
sobj_struct[process_id].local_free_elemvertex = 0 ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods for Edge
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* foreach_leaf_edge()
|
||||
*
|
||||
* For each leaf edges of the binary edge tree, apply the specified
|
||||
* function. Edges are traversed from A to B (i.e., from Pa of the root
|
||||
* to the Pb of the root) if 'reverse' is 0. Otherwise, it is traversed
|
||||
* from B to A.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void foreach_leaf_edge(Edge *edge, long reverse, void (*func)(), long arg1, long arg2, long process_id)
|
||||
{
|
||||
Edge *first, *second ;
|
||||
|
||||
if( edge == 0 )
|
||||
return ;
|
||||
|
||||
if( (edge->ea == 0) && (edge->eb == 0) )
|
||||
func( edge, reverse, arg1, arg2, process_id ) ;
|
||||
else
|
||||
{
|
||||
if( reverse )
|
||||
{
|
||||
first = edge->eb ;
|
||||
second = edge->ea ;
|
||||
}
|
||||
else
|
||||
{
|
||||
first = edge->ea ;
|
||||
second = edge->eb ;
|
||||
}
|
||||
if( first )
|
||||
foreach_leaf_edge( first, reverse, func, arg1, arg2, process_id ) ;
|
||||
if( second )
|
||||
foreach_leaf_edge( second, reverse, func, arg1, arg2, process_id ) ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* create_edge()
|
||||
*
|
||||
* Given two ElemVertices V1 and V2, create a new edge (V1,V2)
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
Edge *create_edge(ElemVertex *v1, ElemVertex *v2, long process_id)
|
||||
{
|
||||
Edge *enew ;
|
||||
|
||||
enew = get_edge(process_id) ;
|
||||
enew->pa = v1 ;
|
||||
enew->pb = v2 ;
|
||||
return( enew ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* subdivide_edge()
|
||||
*
|
||||
* Create child edges. If they already exist, do nothing.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void subdivide_edge(Edge *e, float a_ratio, long process_id)
|
||||
{
|
||||
Edge *enew, *e_am ;
|
||||
ElemVertex *ev_middle ;
|
||||
float b_ratio ;
|
||||
|
||||
/* Lock the element before checking the value */
|
||||
LOCK(e->edge_lock->lock);
|
||||
|
||||
/* Check if the element already has children */
|
||||
if( ! _LEAF_EDGE(e) )
|
||||
{
|
||||
UNLOCK(e->edge_lock->lock);
|
||||
return ;
|
||||
}
|
||||
|
||||
/* Create the subdivision point */
|
||||
b_ratio = (float)1.0 - a_ratio ;
|
||||
ev_middle = get_elemvertex(process_id) ;
|
||||
ev_middle->p.x = a_ratio * e->pa->p.x + b_ratio * e->pb->p.x ;
|
||||
ev_middle->p.y = a_ratio * e->pa->p.y + b_ratio * e->pb->p.y ;
|
||||
ev_middle->p.z = a_ratio * e->pa->p.z + b_ratio * e->pb->p.z ;
|
||||
|
||||
/* (1) Create edge(A-middle) */
|
||||
enew = get_edge(process_id) ;
|
||||
e_am = enew ;
|
||||
enew->pa = e->pa ;
|
||||
enew->pb = ev_middle ;
|
||||
|
||||
/* (2) Create edge(middle-B) */
|
||||
enew = get_edge(process_id) ;
|
||||
enew->pa = ev_middle ;
|
||||
enew->pb = e->pb ;
|
||||
e->eb = enew ;
|
||||
|
||||
/* Finally, set e->ea */
|
||||
e->ea = e_am ;
|
||||
|
||||
/* Unlock the element */
|
||||
UNLOCK(e->edge_lock->lock);
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* get_edge
|
||||
*
|
||||
* Returns an Edge object
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
|
||||
Edge *get_edge(long process_id)
|
||||
{
|
||||
Edge *edge ;
|
||||
|
||||
if( sobj_struct[process_id].n_local_free_edge == 0 )
|
||||
{
|
||||
LOCK(global->free_edge_lock);
|
||||
if ( MAX_EDGES - global->free_edge < N_EDGE_ALLOCATE )
|
||||
{
|
||||
fprintf( stderr, "Fatal:Ran out of Edge buffer\n" ) ;
|
||||
UNLOCK(global->free_edge_lock);
|
||||
exit(1) ;
|
||||
}
|
||||
sobj_struct[process_id].n_local_free_edge = N_EDGE_ALLOCATE ;
|
||||
sobj_struct[process_id].local_free_edge
|
||||
= &global->edge_buf[ global->free_edge ] ;
|
||||
global->free_edge += N_EDGE_ALLOCATE ;
|
||||
UNLOCK(global->free_edge_lock);
|
||||
}
|
||||
|
||||
edge = sobj_struct[process_id].local_free_edge++ ;
|
||||
sobj_struct[process_id].n_local_free_edge-- ;
|
||||
|
||||
|
||||
/* Initialize contents */
|
||||
edge->pa = 0 ;
|
||||
edge->pb = 0 ;
|
||||
edge->ea = 0 ;
|
||||
edge->eb = 0 ;
|
||||
|
||||
return( edge ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* init_edge()
|
||||
*
|
||||
* Initialize Edge buffer.
|
||||
* This routine must be called in single process state.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
void init_edge(long process_id)
|
||||
{
|
||||
long edge_cnt ;
|
||||
|
||||
/* Initialize global free list */
|
||||
LOCKINIT(global->free_edge_lock);
|
||||
global->free_edge = 0 ;
|
||||
|
||||
/* Allocate locks */
|
||||
for( edge_cnt = 0 ; edge_cnt < MAX_EDGES ; edge_cnt++ )
|
||||
global->edge_buf[ edge_cnt ].edge_lock
|
||||
= get_sharedlock( SHARED_LOCK_SEG0, process_id ) ;
|
||||
|
||||
/* Initialize local free list */
|
||||
sobj_struct[process_id].n_local_free_edge = 0 ;
|
||||
sobj_struct[process_id].local_free_edge = 0 ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods for Shared_Lock
|
||||
*
|
||||
* Some machines provide a limited number of lock variables due to hardware
|
||||
* constraints etc. This package controls the sharing of this limited number
|
||||
* of locks among objects.
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************/
|
||||
/***************************************************************************
|
||||
*
|
||||
* init_sharedlock()
|
||||
*
|
||||
* Initialize shared lock.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
void init_sharedlock(long process_id)
|
||||
{
|
||||
long i ;
|
||||
|
||||
for( i = 0 ; i < MAX_SHARED_LOCK ; i++ )
|
||||
{
|
||||
LOCKINIT(global->sh_lock[i].lock);
|
||||
}
|
||||
|
||||
sobj_struct[process_id].lock_alloc_counter = 0 ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* get_sharedlock()
|
||||
*
|
||||
* Return a shared lock variable. If SHARED_LOCK_SEG[01] is specified,
|
||||
* the lock is selected from the specified segment. If SHARED_LOCK_SEGANY
|
||||
* is specified, the lock is picked up from arbitrary segment.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
Shared_Lock *get_sharedlock(long segment, long process_id)
|
||||
{
|
||||
Shared_Lock *pshl ;
|
||||
long effective_lock_ctr ;
|
||||
|
||||
/* Compute effective lock allocation counter value */
|
||||
switch( segment )
|
||||
{
|
||||
case SHARED_LOCK_SEG0:
|
||||
effective_lock_ctr = sobj_struct[process_id].lock_alloc_counter % SHARED_LOCK_SEG_SIZE ;
|
||||
break ;
|
||||
case SHARED_LOCK_SEG1:
|
||||
effective_lock_ctr = sobj_struct[process_id].lock_alloc_counter % SHARED_LOCK_SEG_SIZE
|
||||
+ SHARED_LOCK_SEG_SIZE ;
|
||||
break ;
|
||||
default:
|
||||
effective_lock_ctr = sobj_struct[process_id].lock_alloc_counter ;
|
||||
}
|
||||
|
||||
|
||||
/* Get pointer to the lock */
|
||||
pshl = &global->sh_lock[ effective_lock_ctr ] ;
|
||||
|
||||
/* Update the lock counter */
|
||||
sobj_struct[process_id].lock_alloc_counter++ ;
|
||||
if( sobj_struct[process_id].lock_alloc_counter >= MAX_SHARED_LOCK )
|
||||
sobj_struct[process_id].lock_alloc_counter = 0 ;
|
||||
|
||||
return( pshl ) ;
|
||||
}
|
||||
|
34
splash2/codes/apps/radiosity/structs.H
Normal file
34
splash2/codes/apps/radiosity/structs.H
Normal file
|
@ -0,0 +1,34 @@
|
|||
#ifndef _STRUCTS_H
|
||||
#define _STRUCTS_H
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Vertex - 3D coordinate
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
float x, y, z ;
|
||||
} Vertex;
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Color (R,G,B)
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
float r, g, b ;
|
||||
} Rgb;
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Ray - 3D coordinate
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
typedef struct {
|
||||
float x, y, z;
|
||||
} Ray;
|
||||
|
||||
#endif
|
169
splash2/codes/apps/radiosity/task.H
Normal file
169
splash2/codes/apps/radiosity/task.H
Normal file
|
@ -0,0 +1,169 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
|
||||
#ifndef _TASK_H
|
||||
#define _TASK_H
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Constants
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#define PAGE_SIZE 4096 /* page size of system, used for padding to
|
||||
allow page placement of some logically
|
||||
per-process data structures */
|
||||
|
||||
/*** Task types ***/
|
||||
#define TASK_MODELING (1)
|
||||
#define TASK_BSP (2)
|
||||
#define TASK_FF_REFINEMENT (4)
|
||||
#define TASK_RAY (8)
|
||||
#define TASK_RAD_AVERAGE (16)
|
||||
#define TASK_VISIBILITY (32)
|
||||
|
||||
|
||||
/*** Controling parallelism ***/
|
||||
|
||||
#define MAX_TASKGET_RETRY (32) /* Max # of retry get_task() can make */
|
||||
#define N_ALLOCATE_LOCAL_TASK (8) /* get_task() and free_task() transfer
|
||||
this # of task objects to/from the
|
||||
global shared queue at a time */
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Task Descriptors
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
/* Decompose modeling object into patches (B-reps) */
|
||||
typedef struct {
|
||||
long type ; /* Object type */
|
||||
Model *model ; /* Object to be decomposed */
|
||||
} Modeling_Task ;
|
||||
|
||||
|
||||
/* Insert a new patch to the BSP tree */
|
||||
typedef struct {
|
||||
Patch *patch ; /* Patch to be inserted */
|
||||
Patch *parent ; /* Parent node in the BSP tree */
|
||||
} BSP_Task ;
|
||||
|
||||
|
||||
/* Refine element interaction based on FF value or BF value */
|
||||
typedef struct {
|
||||
Element *e1, *e2 ; /* Interacting elements */
|
||||
float visibility ; /* Visibility of parent */
|
||||
long level ; /* Path length from the root element */
|
||||
} Refinement_Task ;
|
||||
|
||||
|
||||
typedef struct {
|
||||
long ray_type ;
|
||||
Element *e ; /* The element we are interested in */
|
||||
} Ray_Task ;
|
||||
|
||||
|
||||
typedef struct {
|
||||
Element *e ; /* The element we are interested in */
|
||||
Interaction *inter ; /* Top of interactions */
|
||||
long n_inter ; /* Number of interactions */
|
||||
void (*k)() ; /* Continuation */
|
||||
} Visibility_Task ;
|
||||
|
||||
/* Radiosity averaging task */
|
||||
|
||||
#define RAD_AVERAGING_MODE (0)
|
||||
#define RAD_NORMALIZING_MODE (1)
|
||||
|
||||
typedef struct {
|
||||
Element *e ;
|
||||
long level ;
|
||||
long mode ;
|
||||
} RadAvg_Task ;
|
||||
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Task Definition
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
|
||||
typedef struct _task {
|
||||
long task_type ;
|
||||
struct _task *next ;
|
||||
union {
|
||||
Modeling_Task model ;
|
||||
BSP_Task bsp ;
|
||||
Refinement_Task ref ;
|
||||
Ray_Task ray ;
|
||||
Visibility_Task vis ;
|
||||
RadAvg_Task rad ;
|
||||
} task ;
|
||||
} Task ;
|
||||
|
||||
|
||||
typedef struct {
|
||||
char pad1[PAGE_SIZE]; /* padding to avoid false-sharing
|
||||
and allow page-placement */
|
||||
LOCKDEC(q_lock)
|
||||
Task *top, *tail ;
|
||||
long n_tasks ;
|
||||
LOCKDEC(f_lock)
|
||||
long n_free ;
|
||||
Task *free ;
|
||||
char pad2[PAGE_SIZE]; /* padding to avoid false-sharing
|
||||
and allow page-placement */
|
||||
} Task_Queue ;
|
||||
|
||||
|
||||
#define TASK_APPEND (0)
|
||||
#define TASK_INSERT (1)
|
||||
|
||||
#define taskq_length(q) (q->n_tasks)
|
||||
#define taskq_top(q) (q->top)
|
||||
#define taskq_too_long(q) ((q)->n_tasks > n_tasks_per_queue)
|
||||
|
||||
/*
|
||||
* taskman.C
|
||||
*/
|
||||
void process_tasks(long process_id);
|
||||
long _process_task_wait_loop(void);
|
||||
void create_modeling_task(Model *model, long type, long process_id);
|
||||
void create_bsp_task(Patch *patch, Patch *parent, long process_id);
|
||||
void create_ff_refine_task(Element *e1, Element *e2, long level, long process_id);
|
||||
void create_ray_task(Element *e, long process_id);
|
||||
void enqueue_ray_task(long qid, Element *e, long mode, long process_id);
|
||||
void create_visibility_tasks(Element *e, void (*k)(), long process_id);
|
||||
void create_radavg_task(Element *e, long mode, long process_id);
|
||||
void enqueue_radavg_task(long qid, Element *e, long mode, long process_id);
|
||||
void enqueue_task(long qid, Task *task, long mode);
|
||||
Task *dequeue_task(long qid, long max_visit, long process_id);
|
||||
Task *get_task(long process_id);
|
||||
void free_task(Task *task, long process_id);
|
||||
void init_taskq(long process_id);
|
||||
long check_task_counter(void);
|
||||
long assign_taskq(long process_id);
|
||||
void print_task(Task *task);
|
||||
void print_taskq(Task_Queue *tq);
|
||||
|
||||
#endif
|
||||
|
778
splash2/codes/apps/radiosity/taskman.C
Normal file
778
splash2/codes/apps/radiosity/taskman.C
Normal file
|
@ -0,0 +1,778 @@
|
|||
/*************************************************************************/
|
||||
/* */
|
||||
/* Copyright (c) 1994 Stanford University */
|
||||
/* */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Permission is given to use, copy, and modify this software for any */
|
||||
/* non-commercial purpose as long as this copyright notice is not */
|
||||
/* removed. All other uses, including redistribution in whole or in */
|
||||
/* part, are forbidden without prior written permission. */
|
||||
/* */
|
||||
/* This software is provided with absolutely no warranty and no */
|
||||
/* support. */
|
||||
/* */
|
||||
/*************************************************************************/
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* Task management.
|
||||
*
|
||||
* This module has the following functions.
|
||||
* (1) Allocate/free a task object.
|
||||
* (2) Enqueue/decuque a task object.
|
||||
*
|
||||
*************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
EXTERN_ENV;
|
||||
|
||||
include(radiosity.h)
|
||||
|
||||
|
||||
struct {
|
||||
char pad1[PAGE_SIZE]; /* padding to avoid false-sharing
|
||||
and allow page-placement */
|
||||
long n_local_free_task ;
|
||||
Task *local_free_task ;
|
||||
long crnt_taskq_id ;
|
||||
char pad2[PAGE_SIZE]; /* padding to avoid false-sharing
|
||||
and allow page-placement */
|
||||
} task_struct[MAX_PROCESSORS];
|
||||
|
||||
/***************************************************************************
|
||||
****************************************************************************
|
||||
*
|
||||
* Methods for Task object
|
||||
*
|
||||
****************************************************************************
|
||||
****************************************************************************/
|
||||
/***************************************************************************
|
||||
*
|
||||
* process_tasks()
|
||||
*
|
||||
* Process tasks in the task queue. Task type is specified by the mask.
|
||||
* Multiple task types may be specified by bit-oring the task type.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#define QUEUES_VISITED (n_taskqueues)
|
||||
#define DEQUEUE_TASK(q,v,p) (dequeue_task((q),(v),p))
|
||||
|
||||
void process_tasks(long process_id)
|
||||
{
|
||||
Task *t ;
|
||||
|
||||
t = DEQUEUE_TASK( taskqueue_id[process_id], QUEUES_VISITED, process_id ) ;
|
||||
|
||||
retry_entry:
|
||||
while( t )
|
||||
{
|
||||
switch( t->task_type )
|
||||
{
|
||||
case TASK_MODELING:
|
||||
process_model( t->task.model.model, t->task.model.type, process_id ) ;
|
||||
break ;
|
||||
case TASK_BSP:
|
||||
define_patch( t->task.bsp.patch, t->task.bsp.parent, process_id ) ;
|
||||
break ;
|
||||
case TASK_FF_REFINEMENT:
|
||||
ff_refine_elements( t->task.ref.e1, t->task.ref.e2, 0, process_id ) ;
|
||||
break ;
|
||||
case TASK_RAY:
|
||||
process_rays( t->task.ray.e, process_id ) ;
|
||||
break ;
|
||||
case TASK_VISIBILITY:
|
||||
visibility_task( t->task.vis.e, t->task.vis.inter,
|
||||
t->task.vis.n_inter, t->task.vis.k, process_id ) ;
|
||||
break ;
|
||||
case TASK_RAD_AVERAGE:
|
||||
radiosity_averaging( t->task.rad.e, t->task.rad.mode, process_id ) ;
|
||||
break ;
|
||||
default:
|
||||
fprintf( stderr, "Panic:process_tasks:Illegal task type\n" );
|
||||
}
|
||||
|
||||
/* Free the task */
|
||||
free_task( t, process_id ) ;
|
||||
|
||||
/* Get next task */
|
||||
t = DEQUEUE_TASK( taskqueue_id[process_id], QUEUES_VISITED, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
/* Barrier. While waiting for other processors to finish, poll the task
|
||||
queues and resume processing if there is any task */
|
||||
|
||||
LOCK(global->pbar_lock);
|
||||
/* Reset the barrier counter if not initialized */
|
||||
if( global->pbar_count >= n_processors )
|
||||
global->pbar_count = 0 ;
|
||||
|
||||
/* Increment the counter */
|
||||
global->pbar_count++ ;
|
||||
UNLOCK(global->pbar_lock);
|
||||
|
||||
/* barrier spin-wait loop */
|
||||
while( global->pbar_count < n_processors )
|
||||
{
|
||||
/* Wait for a while and then retry dequeue */
|
||||
if( _process_task_wait_loop() )
|
||||
break ;
|
||||
|
||||
/* Waited for a while but other processors are still running.
|
||||
Poll the task queue again */
|
||||
t = DEQUEUE_TASK( taskqueue_id[process_id], QUEUES_VISITED, process_id ) ;
|
||||
if( t )
|
||||
{
|
||||
/* Task found. Exit the barrier and work on it */
|
||||
LOCK(global->pbar_lock);
|
||||
global->pbar_count-- ;
|
||||
UNLOCK(global->pbar_lock);
|
||||
goto retry_entry ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
BARRIER(global->barrier, n_processors);
|
||||
}
|
||||
|
||||
|
||||
long _process_task_wait_loop()
|
||||
{
|
||||
long i ;
|
||||
long finished = 0 ;
|
||||
|
||||
/* Wait for a while and then retry */
|
||||
for( i = 0 ; i < 1000 && ! finished ; i++ )
|
||||
{
|
||||
if( ((i & 0xff) == 0) && ((volatile long)global->pbar_count >= n_processors) )
|
||||
|
||||
finished = 1 ;
|
||||
}
|
||||
|
||||
return( finished ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* create_modeling_task()
|
||||
* create_bsp_task()
|
||||
* create_ff_refine_task()
|
||||
* create_ray_task()
|
||||
* create_visibility_task()
|
||||
* create_radavg_task()
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
void create_modeling_task(Model *model, long type, long process_id)
|
||||
{
|
||||
/* Implemented this way (routine just calls another routine)
|
||||
for historical reasons */
|
||||
|
||||
process_model( model, type, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
|
||||
|
||||
void create_bsp_task(Patch *patch, Patch *parent, long process_id)
|
||||
{
|
||||
/* Implemented this way (routine just calls another routine) for historical reasons */
|
||||
define_patch( patch, parent, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void create_ff_refine_task(Element *e1, Element *e2, long level, long process_id)
|
||||
{
|
||||
Task *t ;
|
||||
|
||||
/* Check existing parallelism */
|
||||
if( taskq_too_long(&global->task_queue[ taskqueue_id[process_id] ]) )
|
||||
{
|
||||
/* Task queue is too long. Solve it immediately */
|
||||
ff_refine_elements( e1, e2, level, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
|
||||
/* Create a task */
|
||||
t = get_task(process_id) ;
|
||||
t->task_type = TASK_FF_REFINEMENT ;
|
||||
t->task.ref.e1 = e1 ;
|
||||
t->task.ref.e2 = e2 ;
|
||||
t->task.ref.level = level ;
|
||||
|
||||
/* Put in the queue */
|
||||
enqueue_task( taskqueue_id[process_id], t, TASK_INSERT ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void create_ray_task(Element *e, long process_id)
|
||||
{
|
||||
/* Check existing parallelism */
|
||||
if( ((e->n_interactions + e->n_vis_undef_inter)
|
||||
< N_inter_parallel_bf_refine)
|
||||
|| taskq_too_long(&global->task_queue[ taskqueue_id[process_id] ]) )
|
||||
{
|
||||
/* Task size is small, or the queue is too long.
|
||||
Solve it immediately */
|
||||
process_rays( e, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
|
||||
/* Put in the queue */
|
||||
enqueue_ray_task( taskqueue_id[process_id], e, TASK_INSERT, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
void enqueue_ray_task(long qid, Element *e, long mode, long process_id)
|
||||
{
|
||||
Task *t ;
|
||||
|
||||
/* Create task object */
|
||||
t = get_task(process_id) ;
|
||||
t->task_type = TASK_RAY ;
|
||||
t->task.ray.e = e ;
|
||||
|
||||
/* Put in the queue */
|
||||
enqueue_task( qid, t, mode ) ;
|
||||
}
|
||||
|
||||
|
||||
void create_visibility_tasks(Element *e, void (*k)(), long process_id)
|
||||
{
|
||||
long n_tasks ;
|
||||
long remainder ; /* Residue of MOD(total_undefs)*/
|
||||
long i_cnt ;
|
||||
Interaction *top, *tail ;
|
||||
Task *t ;
|
||||
long total_undefs = 0 ;
|
||||
long tasks_created = 0 ;
|
||||
|
||||
/* Check number of hard problems */
|
||||
for( top = e->vis_undef_inter ; top ; top = top->next )
|
||||
if( top->visibility == VISIBILITY_UNDEF )
|
||||
total_undefs++ ;
|
||||
|
||||
if( total_undefs == 0 )
|
||||
{
|
||||
/* No process needs to be created. Call the continuation
|
||||
immediately */
|
||||
(*k)( e, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
|
||||
/* Check existing parallelism */
|
||||
if( (total_undefs < N_visibility_per_task)
|
||||
|| taskq_too_long(&global->task_queue[ taskqueue_id[process_id] ]) )
|
||||
{
|
||||
/* Task size is small, or the queue is too long.
|
||||
Solve it immediately. */
|
||||
visibility_task( e, e->vis_undef_inter,
|
||||
e->n_vis_undef_inter, k, process_id ) ;
|
||||
|
||||
return ;
|
||||
}
|
||||
|
||||
/* Create multiple tasks. Hard problems (i.e. where visibility comp is
|
||||
really necessary) are divided into 'n_tasks' groups by residue
|
||||
number division (or Bresenham's DDA) */
|
||||
/* Note: once the first task is enqueued, the vis-undef list may be
|
||||
modified while other tasks are being created. So, any information
|
||||
that is necessary in the for-loop must be read from the element
|
||||
and saved locally */
|
||||
|
||||
n_tasks = (total_undefs + N_visibility_per_task - 1)
|
||||
/ N_visibility_per_task ;
|
||||
remainder = 0 ;
|
||||
i_cnt = 0 ;
|
||||
for( top = e->vis_undef_inter, tail = top ; tail ; tail = tail->next )
|
||||
{
|
||||
i_cnt++ ;
|
||||
|
||||
if( tail->visibility != VISIBILITY_UNDEF )
|
||||
continue ;
|
||||
|
||||
remainder += n_tasks ;
|
||||
|
||||
if( remainder >= total_undefs )
|
||||
{
|
||||
/* Create a task */
|
||||
|
||||
/* For the last task, append following (easy) interactions
|
||||
if there is any */
|
||||
tasks_created++ ;
|
||||
if( tasks_created >= n_tasks )
|
||||
for( ; tail->next ; tail = tail->next, i_cnt++ ) ;
|
||||
|
||||
/* Set task descriptor */
|
||||
t = get_task(process_id) ;
|
||||
t->task_type = TASK_VISIBILITY ;
|
||||
t->task.vis.e = e ;
|
||||
t->task.vis.inter = top ;
|
||||
t->task.vis.n_inter = i_cnt ;
|
||||
t->task.vis.k = k ;
|
||||
|
||||
/* Enqueue */
|
||||
enqueue_task( taskqueue_id[process_id], t, TASK_INSERT ) ;
|
||||
|
||||
/* Update pointer and the residue variable */
|
||||
top = tail->next ;
|
||||
remainder -= total_undefs ;
|
||||
i_cnt = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void create_radavg_task(Element *e, long mode, long process_id)
|
||||
{
|
||||
/* Check existing parallelism */
|
||||
if( (e->n_interactions < N_inter_parallel_bf_refine)
|
||||
|| taskq_too_long(&global->task_queue[ taskqueue_id[process_id] ]) )
|
||||
{
|
||||
/* Task size is too small or queue is too long.
|
||||
Solve it immediately */
|
||||
radiosity_averaging( e, mode, process_id ) ;
|
||||
return ;
|
||||
}
|
||||
|
||||
/* Put in the queue */
|
||||
enqueue_radavg_task( taskqueue_id[process_id], e, mode, process_id ) ;
|
||||
}
|
||||
|
||||
|
||||
void enqueue_radavg_task(long qid, Element *e, long mode, long process_id)
|
||||
{
|
||||
Task *t ;
|
||||
|
||||
/* Create task object */
|
||||
t = get_task(process_id) ;
|
||||
t->task_type = TASK_RAD_AVERAGE ;
|
||||
t->task.rad.e = e ;
|
||||
t->task.rad.mode = mode ;
|
||||
|
||||
/* Put in the queue */
|
||||
enqueue_task( qid, t, TASK_INSERT ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* enqueue_task()
|
||||
* dequeue_task()
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void enqueue_task(long qid, Task *task, long mode)
|
||||
{
|
||||
Task_Queue *tq ;
|
||||
|
||||
|
||||
tq = &global->task_queue[ qid ] ;
|
||||
|
||||
/* Lock the task queue */
|
||||
LOCK(tq->q_lock);
|
||||
|
||||
if( tq->tail == 0 )
|
||||
{
|
||||
/* The first task in the queue */
|
||||
tq->tail = task ;
|
||||
tq->top = task ;
|
||||
tq->n_tasks = 1 ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Usual case */
|
||||
if( mode == TASK_APPEND )
|
||||
{
|
||||
tq->tail->next = task ;
|
||||
tq->tail = task ;
|
||||
tq->n_tasks++ ;
|
||||
}
|
||||
else
|
||||
{
|
||||
task->next = tq->top ;
|
||||
tq->top = task ;
|
||||
tq->n_tasks++ ;
|
||||
}
|
||||
}
|
||||
|
||||
/* Unlock the task queue */
|
||||
UNLOCK(tq->q_lock);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
Task *dequeue_task(long qid, long max_visit, long process_id)
|
||||
/*
|
||||
* Attempts to dequeue first from the specified queue (qid), but if no
|
||||
* task is found the routine searches max_visit other queues and returns
|
||||
* a task. If a task is taken from another queue, the task is taken from
|
||||
* the tail of the queue (usually, larger amount of work is involved than
|
||||
* the task at the top of the queue and more locality can be exploited
|
||||
* within the stolen task).
|
||||
*/
|
||||
{
|
||||
Task_Queue *tq ;
|
||||
Task *t = 0 ;
|
||||
Task *prev ;
|
||||
long visit_count = 0 ;
|
||||
long sign = -1 ; /* The first retry will go backward */
|
||||
long offset ;
|
||||
|
||||
/* Check number of queues to be visited */
|
||||
if( max_visit > n_taskqueues )
|
||||
max_visit = n_taskqueues ;
|
||||
|
||||
/* Get next task */
|
||||
while( visit_count < max_visit )
|
||||
{
|
||||
/* Select a task queue */
|
||||
tq = &global->task_queue[ qid ] ;
|
||||
|
||||
/* Check the length (test-test&set) */
|
||||
if( tq->n_tasks > 0 )
|
||||
{
|
||||
/* Lock the task queue */
|
||||
LOCK(tq->q_lock);
|
||||
if( tq->top )
|
||||
{
|
||||
if( qid == taskqueue_id[process_id] )
|
||||
{
|
||||
t = tq->top ;
|
||||
tq->top = t->next ;
|
||||
if( tq->top == 0 )
|
||||
tq->tail = 0 ;
|
||||
tq->n_tasks-- ;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Get tail */
|
||||
for( prev = 0, t = tq->top ; t->next ;
|
||||
prev = t, t = t->next ) ;
|
||||
|
||||
if( prev == 0 )
|
||||
tq->top = 0 ;
|
||||
else
|
||||
prev->next = 0 ;
|
||||
tq->tail = prev ;
|
||||
tq->n_tasks-- ;
|
||||
}
|
||||
}
|
||||
/* Unlock the task queue */
|
||||
UNLOCK(tq->q_lock);
|
||||
break ;
|
||||
}
|
||||
|
||||
/* Update visit count */
|
||||
visit_count++ ;
|
||||
|
||||
/* Compute next taskqueue ID */
|
||||
offset = (sign > 0)? visit_count : -visit_count ;
|
||||
sign = -sign ;
|
||||
|
||||
qid += offset ;
|
||||
if( qid < 0 )
|
||||
qid += n_taskqueues ;
|
||||
else if( qid >= n_taskqueues )
|
||||
qid -= n_taskqueues ;
|
||||
}
|
||||
|
||||
return( t ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* get_task() Create a new instance of Task
|
||||
* free_task() Free a Task object
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
Task *get_task(long process_id)
|
||||
{
|
||||
Task *p ;
|
||||
Task_Queue *tq ;
|
||||
long i ;
|
||||
long q_id ;
|
||||
long retry_count = 0 ;
|
||||
|
||||
/* First, check local task queue */
|
||||
if( task_struct[process_id].local_free_task == 0 )
|
||||
{
|
||||
/* If empty, allocate task objects from the shared list */
|
||||
q_id = taskqueue_id[process_id] ;
|
||||
|
||||
while( task_struct[process_id].local_free_task == 0 )
|
||||
{
|
||||
tq = &global->task_queue[ q_id ] ;
|
||||
|
||||
if( tq->n_free > 0 )
|
||||
{
|
||||
LOCK(tq->f_lock);
|
||||
if( tq->free )
|
||||
{
|
||||
/* Scan the free list */
|
||||
for( i = 1, p = tq->free ;
|
||||
(i < N_ALLOCATE_LOCAL_TASK) && p->next ;
|
||||
i++, p = p->next ) ;
|
||||
|
||||
task_struct[process_id].local_free_task = tq->free ;
|
||||
task_struct[process_id].n_local_free_task = i ;
|
||||
tq->free = p->next ;
|
||||
tq->n_free -= i ;
|
||||
p->next = 0 ;
|
||||
UNLOCK(tq->f_lock);
|
||||
break ;
|
||||
}
|
||||
UNLOCK(tq->f_lock);
|
||||
}
|
||||
|
||||
/* Try next task queue */
|
||||
if( ++q_id >= n_taskqueues )
|
||||
q_id = 0 ;
|
||||
|
||||
/* Check retry count */
|
||||
if( ++retry_count > MAX_TASKGET_RETRY )
|
||||
{
|
||||
fprintf( stderr, "Panic(P%ld):No free task\n",
|
||||
process_id ) ;
|
||||
fprintf( stderr, " Local %ld\n", task_struct[process_id].n_local_free_task ) ;
|
||||
fprintf( stderr, " Q0 free %ld\n",
|
||||
global->task_queue[0].n_free ) ;
|
||||
fprintf( stderr, " Q0 task %ld\n",
|
||||
global->task_queue[0].n_tasks ) ;
|
||||
exit(1) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Delete from the queue */
|
||||
p = task_struct[process_id].local_free_task ;
|
||||
task_struct[process_id].local_free_task = p->next ;
|
||||
task_struct[process_id].n_local_free_task-- ;
|
||||
|
||||
/* Clear pointer just in case.. */
|
||||
p->next = 0 ;
|
||||
|
||||
|
||||
return( p ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void free_task(Task *task, long process_id)
|
||||
{
|
||||
Task_Queue *tq ;
|
||||
Task *p, *top ;
|
||||
long i ;
|
||||
|
||||
/* Insert to the local queue */
|
||||
task->next = task_struct[process_id].local_free_task ;
|
||||
task_struct[process_id].local_free_task = task ;
|
||||
task_struct[process_id].n_local_free_task++ ;
|
||||
|
||||
/* If local list is too long, export some tasks */
|
||||
if( task_struct[process_id].n_local_free_task >= (N_ALLOCATE_LOCAL_TASK * 2) )
|
||||
{
|
||||
tq = &global->task_queue[ taskqueue_id[process_id] ] ;
|
||||
|
||||
for( i = 1, p = task_struct[process_id].local_free_task ;
|
||||
i < N_ALLOCATE_LOCAL_TASK ; i++, p = p->next ) ;
|
||||
|
||||
/* Update local list */
|
||||
top = task_struct[process_id].local_free_task ;
|
||||
task_struct[process_id].local_free_task = p->next ;
|
||||
task_struct[process_id].n_local_free_task -= i ;
|
||||
|
||||
/* Insert in the shared list */
|
||||
LOCK(tq->f_lock);
|
||||
p->next = tq->free ;
|
||||
tq->free = top ;
|
||||
tq->n_free += i ;
|
||||
UNLOCK(tq->f_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* init_taskq()
|
||||
*
|
||||
* Initialize task free list and the task queue.
|
||||
* This routine must be called when only one process is active.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
void init_taskq(long process_id)
|
||||
{
|
||||
long i ;
|
||||
long qid ;
|
||||
long task_index = 0 ;
|
||||
long task_per_queue ;
|
||||
long n_tasks ;
|
||||
|
||||
/* Reset task assignment index */
|
||||
task_struct[process_id].crnt_taskq_id = 0 ;
|
||||
|
||||
/* Initialize task queues */
|
||||
task_per_queue = (MAX_TASKS + n_taskqueues - 1) / n_taskqueues ;
|
||||
|
||||
for( qid = 0 ; qid < n_taskqueues ; qid++ )
|
||||
{
|
||||
/* Initialize free list */
|
||||
if (task_index + task_per_queue > MAX_TASKS )
|
||||
n_tasks = MAX_TASKS - task_index ;
|
||||
else
|
||||
n_tasks = task_per_queue ;
|
||||
|
||||
for( i = task_index ; i < task_index + n_tasks - 1 ; i++ )
|
||||
global->task_buf[i].next = &global->task_buf[i+1] ;
|
||||
global->task_buf[ i ].next = 0 ;
|
||||
|
||||
global->task_queue[ qid ].free = &global->task_buf[ task_index ] ;
|
||||
global->task_queue[ qid ].n_free = n_tasks ;
|
||||
|
||||
/* Initialize task queue */
|
||||
global->task_queue[ qid ].top = 0 ;
|
||||
global->task_queue[ qid ].tail = 0 ;
|
||||
global->task_queue[ qid ].n_tasks = 0 ;
|
||||
|
||||
/* Initialize locks */
|
||||
LOCKINIT(global->task_queue[ qid ].q_lock);
|
||||
LOCKINIT(global->task_queue[ qid ].f_lock);
|
||||
|
||||
/* Update index for next queue */
|
||||
task_index += n_tasks ;
|
||||
}
|
||||
|
||||
/* Initialize local free lists */
|
||||
task_struct[process_id].n_local_free_task = 0 ;
|
||||
task_struct[process_id].local_free_task = 0 ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* check_task_counter()
|
||||
*
|
||||
* Check task counter and return TRUE if this is the first task.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
long check_task_counter()
|
||||
{
|
||||
long flag = 0 ;
|
||||
|
||||
|
||||
LOCK(global->task_counter_lock);
|
||||
|
||||
if( global->task_counter == 0 )
|
||||
/* First processor */
|
||||
flag = 1 ;
|
||||
|
||||
global->task_counter++ ;
|
||||
if( global->task_counter >= n_processors )
|
||||
global->task_counter = 0 ;
|
||||
|
||||
UNLOCK(global->task_counter_lock);
|
||||
|
||||
return( flag ) ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* assign_taskq()
|
||||
*
|
||||
* Assign process its task queue.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
long assign_taskq(long process_id)
|
||||
{
|
||||
long qid ;
|
||||
|
||||
qid = task_struct[process_id].crnt_taskq_id++ ;
|
||||
|
||||
if( task_struct[process_id].crnt_taskq_id >= n_taskqueues )
|
||||
task_struct[process_id].crnt_taskq_id = 0 ;
|
||||
|
||||
return( qid ) ;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
*
|
||||
* print_task()
|
||||
* print_taskq()
|
||||
*
|
||||
* Print contents of a task.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
void print_task(Task *task)
|
||||
{
|
||||
if( task == 0 )
|
||||
{
|
||||
printf( "Task (NULL)\n" ) ;
|
||||
return ;
|
||||
}
|
||||
|
||||
switch( task->task_type )
|
||||
{
|
||||
case TASK_MODELING:
|
||||
printf( "Task (Model)\n" ) ;
|
||||
break ;
|
||||
case TASK_BSP:
|
||||
printf( "Task (BSP)\n" ) ;
|
||||
break ;
|
||||
case TASK_FF_REFINEMENT:
|
||||
printf( "Task (FF Refinement)\n" ) ;
|
||||
break ;
|
||||
case TASK_RAY:
|
||||
printf( "Task (Ray) (patch ID %ld)\n",
|
||||
task->task.ray.e->patch->seq_no ) ;
|
||||
break ;
|
||||
case TASK_VISIBILITY:
|
||||
printf( "Task (Visibility) (patch ID %ld)\n",
|
||||
task->task.vis.e->patch->seq_no ) ;
|
||||
break ;
|
||||
case TASK_RAD_AVERAGE:
|
||||
printf( "Task (RadAvg)\n" ) ;
|
||||
break ;
|
||||
default:
|
||||
fprintf( stderr, "Task(Illegal task type %ld)\n", task->task_type );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void print_taskq(Task_Queue *tq)
|
||||
{
|
||||
Task *t ;
|
||||
|
||||
printf( "TaskQ: %ld tasks in the queue\n", taskq_length(tq) ) ;
|
||||
for( t = taskq_top(tq) ; t ; t = t->next )
|
||||
{
|
||||
printf( " " ) ;
|
||||
print_task( t ) ;
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue