Compare commits
143 commits
Author | SHA1 | Date | |
---|---|---|---|
c7f92c43da | |||
567a9b0a08 | |||
60075068ea | |||
2f14baaabc | |||
bbdd34d628 | |||
3384caf0fe | |||
4b164f8382 | |||
31199bba53 | |||
9a13acaa36 | |||
3547af6e44 | |||
66a1016a35 | |||
d2254e034e | |||
d7aef8be96 | |||
ba00d7449d | |||
cd9ca71b25 | |||
626a0cc003 | |||
9f3615d9c9 | |||
d82e59f74b | |||
ed44b7f6e2 | |||
03ea796a79 | |||
7d6f2c5050 | |||
7110d2313e | |||
065bb8c655 | |||
a4060ab950 | |||
fcf94310eb | |||
1991ff3bb2 | |||
48333e7e3f | |||
e2805f825a | |||
083cd6da78 | |||
3e200455bd | |||
35cb11f14e | |||
011ddf96fa | |||
691a4574b4 | |||
2c1286865f | |||
6f7bf1b11f | |||
8b85f950e5 | |||
198c515b97 | |||
acce7b0dc0 | |||
e5fe2b82b7 | |||
43418e7f81 | |||
71dd6c2c17 | |||
b2669a7875 | |||
620bf51ae7 | |||
138000796a | |||
b043dcf58a | |||
c07a2d68f3 | |||
846a17308c | |||
9dd54d10ab | |||
82a8230aa7 | |||
746e2f3c27 | |||
46bfc14312 | |||
83cabc6264 | |||
ce2a0076c9 | |||
ceb3bd9733 | |||
0fc9dcf46b | |||
f3e0ac2b06 | |||
efcd8ae024 | |||
93e20c9a73 | |||
2367198921 | |||
073cb26607 | |||
f5656738dc | |||
8602aea4e5 | |||
833fb10ed4 | |||
8d2c3735d9 | |||
60b26f1546 | |||
bec4409add | |||
8b1ba9f99d | |||
a5802c823f | |||
a7a0fd2c58 | |||
6f549419eb | |||
748b87fc36 | |||
a4c4b4188d | |||
41bc2886de | |||
83028e730c | |||
cb97118d91 | |||
e8723310ef | |||
da1ddb9aa7 | |||
a063a14905 | |||
46b4c40277 | |||
767aed4534 | |||
d37bdc7366 | |||
8fe8836eea | |||
3c005c0f0e | |||
2213fba5c5 | |||
f2e2410a50 | |||
184c6d7ebd | |||
912b20d02a | |||
4fc16544af | |||
d3d983caf9 | |||
6c41181b8e | |||
49009f170a | |||
ea8461885f | |||
b792e9e43c | |||
3886c4a8f2 | |||
7b6cf951e2 | |||
96f8ff5702 | |||
a854373d59 | |||
a06a46f5d1 | |||
ddc6931573 | |||
4b8b9c0585 | |||
3bdd58ccb4 | |||
54c478c0b8 | |||
80c17d0a8d | |||
092b06b745 | |||
0edf6dc956 | |||
41beacce08 | |||
d3bfc03688 | |||
ca0fd665dc | |||
94e6126650 | |||
5638a074b9 | |||
72e74aed0a | |||
e9311a59ed | |||
1f1388b6c8 | |||
1be05afa06 | |||
f3643c8a60 | |||
153e5879c6 | |||
87b9f0b87b | |||
76004f08f2 | |||
a309c2f343 | |||
78e4967b6a | |||
0c4a69bcbf | |||
b5045005de | |||
03f740664b | |||
ccd9210e1a | |||
d2b19d2732 | |||
55f5c4dd8a | |||
b25ea094d4 | |||
41a6158954 | |||
164d9bd732 | |||
5fd959260c | |||
f4b14c73fc | |||
0852f0cfc6 | |||
b673f2aaa4 | |||
e07f0c5043 | |||
d728f6786b | |||
f0786704db | |||
653b4657e6 | |||
227bdde922 | |||
e4c57275d3 | |||
c5df9308c9 | |||
e9889c46ed | |||
32d05d5fb6 | |||
2974dc7a37 |
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -13,3 +13,4 @@ m5out
|
||||||
/util/m5/*.o
|
/util/m5/*.o
|
||||||
/util/m5/*.a
|
/util/m5/*.a
|
||||||
/util/m5/m5
|
/util/m5/m5
|
||||||
|
/system/arm/dt/*.dtb
|
||||||
|
|
|
@ -14,3 +14,4 @@ ext/mcpat/regression/*/*.out
|
||||||
util/m5/*.o
|
util/m5/*.o
|
||||||
util/m5/*.a
|
util/m5/*.a
|
||||||
util/m5/m5
|
util/m5/m5
|
||||||
|
system/arm/dt/*.dtb
|
||||||
|
|
337
CONTRIBUTING.md
Normal file
337
CONTRIBUTING.md
Normal file
|
@ -0,0 +1,337 @@
|
||||||
|
Authors: Jason Lowe-Power
|
||||||
|
Andreas Sandberg
|
||||||
|
Steve Reinhardt
|
||||||
|
|
||||||
|
If you've made changes to gem5 that might benefit others, we strongly encourage
|
||||||
|
you to contribute those changes to the public gem5 repository. There are
|
||||||
|
several reasons to do this:
|
||||||
|
* Share your work with others, so that they can benefit from new functionality.
|
||||||
|
* Support the scientific principle by enabling others to evaluate your
|
||||||
|
suggestions without having to guess what you did.
|
||||||
|
* Once your changes are part of the main repo, you no longer have to merge
|
||||||
|
them back in every time you update your local repo. This can be a huge time
|
||||||
|
saving!
|
||||||
|
* Once your code is in the main repo, other people have to make their changes
|
||||||
|
work with your code, and not the other way around.
|
||||||
|
* Others may build on your contributions to make them even better, or extend
|
||||||
|
them in ways you did not have time to do.
|
||||||
|
* You will have the satisfaction of contributing back to the community.
|
||||||
|
|
||||||
|
The main method for contributing code to gem5 is via our code review website:
|
||||||
|
https://gem5-review.googlesource.com/. This documents describes the details of
|
||||||
|
how to create code changes, upload your changes, have your changes
|
||||||
|
reviewed, and finally push your changes to gem5. More information can be found
|
||||||
|
from the following sources:
|
||||||
|
* http://gem5.org/Submitting_Contributions
|
||||||
|
* https://gerrit-review.googlesource.com/Documentation/index.html
|
||||||
|
* https://git-scm.com/book
|
||||||
|
|
||||||
|
|
||||||
|
High-level flow for submitting changes
|
||||||
|
======================================
|
||||||
|
|
||||||
|
+-------------+
|
||||||
|
| Make change |
|
||||||
|
+------+------+
|
||||||
|
|
|
||||||
|
|
|
||||||
|
v
|
||||||
|
+------+------+
|
||||||
|
| Post review |
|
||||||
|
+------+------+
|
||||||
|
|
|
||||||
|
v
|
||||||
|
+--------+---------+
|
||||||
|
| Wait for reviews | <--------+
|
||||||
|
+--------+---------+ |
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
v |
|
||||||
|
+----+----+ No +------+------+
|
||||||
|
|Reviewers+--------->+ Update code |
|
||||||
|
|happy? | +------+------+
|
||||||
|
+----+----+ ^
|
||||||
|
| |
|
||||||
|
| Yes |
|
||||||
|
v |
|
||||||
|
+----+-----+ No |
|
||||||
|
|Maintainer+----------------+
|
||||||
|
|happy? |
|
||||||
|
+----+-----+
|
||||||
|
|
|
||||||
|
| Yes
|
||||||
|
v
|
||||||
|
+------+------+
|
||||||
|
| Submit code |
|
||||||
|
+-------------+
|
||||||
|
|
||||||
|
After creating your change to gem5, you can post a review on our Gerrit
|
||||||
|
code-review site: https://gem5-review.googlesource.com. Before being able to
|
||||||
|
submit your code to the mainline of gem5, the code is reviewed by others in the
|
||||||
|
community. Additionally, the maintainer for that part of the code must sign off
|
||||||
|
on it.
|
||||||
|
|
||||||
|
Cloning the gem5 repo to contribute
|
||||||
|
===================================
|
||||||
|
|
||||||
|
If you plan on contributing, it is strongly encouraged for you to clone the
|
||||||
|
repository directly from our gerrit instance at
|
||||||
|
https://gem5.googlesource.com/.
|
||||||
|
|
||||||
|
To clone the master gem5 repository:
|
||||||
|
> git clone https://gem5.googlesource.com/public/gem5
|
||||||
|
|
||||||
|
Other gem5 repositories
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
There are a few repositories other than the main gem5 development repository.
|
||||||
|
|
||||||
|
* public/m5threads: The code for a pthreads implementation that works with
|
||||||
|
gem5's syscall emulation mode.
|
||||||
|
|
||||||
|
Other gem5 branches
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
None right now.
|
||||||
|
|
||||||
|
Making changes to gem5
|
||||||
|
======================
|
||||||
|
|
||||||
|
It is strongly encouraged to use git branches when making changes to gem5.
|
||||||
|
Additionally, keeping changes small and concise and only have a single logical
|
||||||
|
change per commit.
|
||||||
|
|
||||||
|
Unlike our previous flow with Mercurial and patch queues, when using git, you
|
||||||
|
will be committing changes to your local branch. By using separate branches in
|
||||||
|
git, you will be able to pull in and merge changes from mainline and simply
|
||||||
|
keep up with upstream changes.
|
||||||
|
|
||||||
|
Requirements for change descriptions
|
||||||
|
------------------------------------
|
||||||
|
To help reviewers and future contributors more easily understand and track
|
||||||
|
changes, we require all change descriptions be strictly formatted.
|
||||||
|
|
||||||
|
A canonical commit message consists of three parts:
|
||||||
|
* A short summary line describing the change. This line starts with one or
|
||||||
|
more keywords separated by commas followed by a colon and a description of
|
||||||
|
the change. This line should be no more than 65 characters long since
|
||||||
|
version control systems usually add a prefix that causes line-wrapping for
|
||||||
|
longer lines.
|
||||||
|
* (Optional, but highly recommended) A detailed description. This describes
|
||||||
|
what you have done and why. If the change isn't obvious, you might want to
|
||||||
|
motivate why it is needed. Lines need to be wrapped to 75 characters or
|
||||||
|
less.
|
||||||
|
* Tags describing patch metadata. You are highly recommended to use
|
||||||
|
tags to acknowledge reviewers for their work. Gerrit will automatically add
|
||||||
|
most tags.
|
||||||
|
|
||||||
|
The keyword should be one or more of the following separated by commas:
|
||||||
|
* Architecture name in lower case (e.g., arm or x86): Anything that is
|
||||||
|
target-architecture specific.
|
||||||
|
* base
|
||||||
|
* ext
|
||||||
|
* stats
|
||||||
|
* sim
|
||||||
|
* syscall_emul
|
||||||
|
* config:
|
||||||
|
* mem: Classic memory system. Ruby uses its own keyword.
|
||||||
|
* ruby: Ruby memory models.
|
||||||
|
* cpu: CPU-model specific (except for kvm)
|
||||||
|
* kvm: KVM-specific. Changes to host architecture specific components should
|
||||||
|
include an architecture keyword (e.g., arm or x86) as well.
|
||||||
|
* gpu-compute
|
||||||
|
* energy
|
||||||
|
* dev
|
||||||
|
* arch: General architecture support (src/arch/)
|
||||||
|
* scons: Build-system related. Trivial changes as a side effect of doing
|
||||||
|
something unrelated (e.g., adding a source file to a SConscript) don't
|
||||||
|
require this.
|
||||||
|
* tests
|
||||||
|
* style: Changes to the style checkers of style fixes.
|
||||||
|
* misc
|
||||||
|
|
||||||
|
Tags are an optional mechanism to store additional metadata about a patch and
|
||||||
|
acknowledge people who reported a bug or reviewed that patch. Tags are
|
||||||
|
generally appended to the end of the commit message in the order they happen.
|
||||||
|
We currently use the following tags:
|
||||||
|
* Signed-off-by: Added by the author and the submitter (if different).
|
||||||
|
This tag is a statement saying that you believe the patch to be correct and
|
||||||
|
have the right to submit the patch according to the license in the affected
|
||||||
|
files. Similarly, if you commit someone else's patch, this tells the rest
|
||||||
|
of the world that you have have the right to forward it to the main
|
||||||
|
repository. If you need to make any changes at all to submit the change,
|
||||||
|
these should be described within hard brackets just before your
|
||||||
|
Signed-off-by tag. By adding this line, the contributor certifies the
|
||||||
|
contribution is made under the terms of the Developer Certificate of Origin
|
||||||
|
(DCO) [https://developercertificate.org/].
|
||||||
|
* Reviewed-by: Used to acknowledge patch reviewers. It's generally considered
|
||||||
|
good form to add these. Added automatically.
|
||||||
|
* Reported-by: Used to acknowledge someone for finding and reporting a bug.
|
||||||
|
* Reviewed-on: Link to the review request corresponding to this patch. Added
|
||||||
|
automatically.
|
||||||
|
* Change-Id: Used by Gerrit to track changes across rebases. Added
|
||||||
|
automatically with a commit hook by git.
|
||||||
|
* Tested-by: Used to acknowledge people who tested a patch. Sometimes added
|
||||||
|
automatically by review systems that integrate with CI systems.
|
||||||
|
|
||||||
|
Other than the "Signed-off-by", "Reported-by", and "Tested-by" tags, you
|
||||||
|
generally don't need to add these manually as they are added automatically by
|
||||||
|
Gerrit.
|
||||||
|
|
||||||
|
It is encouraged for the author of the patch and the submitter to add a
|
||||||
|
Signed-off-by tag to the commit message. By adding this line, the contributor
|
||||||
|
certifies the contribution is made under the terms of the Developer Certificate
|
||||||
|
of Origin (DCO) [https://developercertificate.org/].
|
||||||
|
|
||||||
|
It is imperative that you use your real name and your real email address in
|
||||||
|
both tags and in the author field of the changeset.
|
||||||
|
|
||||||
|
For significant changes, authors are encouraged to add copyright information
|
||||||
|
and their names at the beginning of the file. The main purpose of the author
|
||||||
|
names on the file is to track who is most knowledgeable about the file (e.g.,
|
||||||
|
who has contributed a significant amount of code to the file).
|
||||||
|
|
||||||
|
Note: If you do not follow these guidelines, the gerrit review site will
|
||||||
|
automatically reject your patch.
|
||||||
|
If this happens, update your changeset descriptions to match the required style
|
||||||
|
and resubmit. The following is a useful git command to update the most recent
|
||||||
|
commit (HEAD).
|
||||||
|
|
||||||
|
> git commit --amend
|
||||||
|
|
||||||
|
Posting a review
|
||||||
|
================
|
||||||
|
|
||||||
|
If you have not signed up for an account on the Gerrit review site
|
||||||
|
(https://gem5-review.googlesource.com), you first have to create an account.
|
||||||
|
|
||||||
|
Setting up an account
|
||||||
|
---------------------
|
||||||
|
1. Go to https://gem5.googlesource.com/
|
||||||
|
2. Click "Sign In" in the upper right corner. Note: You will need a Google
|
||||||
|
account to contribute.
|
||||||
|
3. After signing in, click "Generate Password" and follow the instructions.
|
||||||
|
|
||||||
|
Submitting a change
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
In gerrit, to submit a review request, you can simply push your git commits to
|
||||||
|
a special named branch. For more information on git push see
|
||||||
|
https://git-scm.com/docs/git-push.
|
||||||
|
|
||||||
|
There are three ways to push your changes to gerrit.
|
||||||
|
|
||||||
|
Push change to gerrit review
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
> git push origin HEAD:refs/for/master
|
||||||
|
|
||||||
|
Assuming origin is https://gem5.googlesource.com/public/gem5 and you want to
|
||||||
|
push the changeset at HEAD, this will create a new review request on top of the
|
||||||
|
master branch. More generally,
|
||||||
|
|
||||||
|
> git push <gem5 gerrit instance> <changeset>:refs/for/<branch>
|
||||||
|
|
||||||
|
See https://gerrit-review.googlesource.com/Documentation/user-upload.html for
|
||||||
|
more information.
|
||||||
|
|
||||||
|
Pushing your first change
|
||||||
|
--------------------------
|
||||||
|
The first time you push a change you may get the following error:
|
||||||
|
|
||||||
|
> remote: ERROR: [fb1366b] missing Change-Id in commit message footer
|
||||||
|
> ...
|
||||||
|
|
||||||
|
Within the error message, there is a command line you should run. For every new
|
||||||
|
clone of the git repo, you need to run the following command to automatically
|
||||||
|
insert the change id in the the commit (all on one line).
|
||||||
|
|
||||||
|
> curl -Lo `git rev-parse --git-dir`/hooks/commit-msg
|
||||||
|
https://gerrit-review.googlesource.com/tools/hooks/commit-msg ; chmod +x
|
||||||
|
`git rev-parse --git-dir`/hooks/commit-msg
|
||||||
|
|
||||||
|
If you receive the above error, simply run this command and then amend your
|
||||||
|
changeset.
|
||||||
|
|
||||||
|
> git commit --amend
|
||||||
|
|
||||||
|
Push change to gerrit as a draft
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
> git push origin HEAD:refs/drafts/master
|
||||||
|
|
||||||
|
Push change bypassing gerrit
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Only maintainers can bypass gerrit review. This should very rarely be used.
|
||||||
|
|
||||||
|
> git push origin HEAD:refs/heads/master
|
||||||
|
|
||||||
|
Other gerrit push options
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
There are a number of options you can specify when uploading your changes to
|
||||||
|
gerrit (e.g., reviewers, labels). The gerrit documentation has more
|
||||||
|
information.
|
||||||
|
https://gerrit-review.googlesource.com/Documentation/user-upload.html
|
||||||
|
|
||||||
|
|
||||||
|
Reviewing patches
|
||||||
|
=================
|
||||||
|
|
||||||
|
Reviewing patches is done on our gerrit instance at
|
||||||
|
https://gem5-review.googlesource.com/.
|
||||||
|
|
||||||
|
After logging in with your Google account, you will be able to comment, review,
|
||||||
|
and push your own patches as well as review others' patches. All gem5 users are
|
||||||
|
encouraged to review patches. The only requirement to review patches is to be
|
||||||
|
polite and respectful of others.
|
||||||
|
|
||||||
|
There are multiple labels in Gerrit that can be applied to each review detailed
|
||||||
|
below.
|
||||||
|
* Code-review: This is used by any gem5 user to review patches. When reviewing
|
||||||
|
a patch you can give it a score of -2 to +2 with the following semantics.
|
||||||
|
* -2: This blocks the patch. You believe that this patch should never be
|
||||||
|
committed. This label should be very rarely used.
|
||||||
|
* -1: You would prefer this is not merged as is
|
||||||
|
* 0: No score
|
||||||
|
* +1: This patch seems good, but you aren't 100% confident that it should be
|
||||||
|
pushed.
|
||||||
|
* +2: This is a good patch and should be pushed as is.
|
||||||
|
* Maintainer: Currently only PMC members are maintainers. At least one
|
||||||
|
maintainer must review your patch and give it a +1 before it can be merged.
|
||||||
|
* Verified: This is automatically generated from the continuous integrated
|
||||||
|
(CI) tests. Each patch must receive at least a +1 from the CI tests before
|
||||||
|
the patch can be merged. The patch will receive a +1 if gem5 builds and
|
||||||
|
runs, and it will receive a +2 if the stats match.
|
||||||
|
* Style-Check: This is automatically generated and tests the patch against the
|
||||||
|
gem5 code style (http://www.gem5.org/Coding_Style). The patch must receive a
|
||||||
|
+1 from the style checker to be pushed.
|
||||||
|
|
||||||
|
Note: Whenever the patch creator updates the patch all reviewers must re-review
|
||||||
|
the patch. There is no longer a "Fix it, then Ship It" option.
|
||||||
|
|
||||||
|
Once you have received reviews for your patch, you will likely need to make
|
||||||
|
changes. To do this, you should update the original git changeset. Then, you
|
||||||
|
can simply push the changeset again to the same Gerrit branch to update the
|
||||||
|
review request.
|
||||||
|
|
||||||
|
> git push origin HEAD:refs/for/master
|
||||||
|
|
||||||
|
Note: If you have posted a patch and don't receive any reviews, you may need to
|
||||||
|
prod the reviewers. You can do this by adding a reply to your changeset review
|
||||||
|
on gerrit. It is expected that at least the maintainer will supply a review for
|
||||||
|
your patch.
|
||||||
|
|
||||||
|
Committing changes
|
||||||
|
==================
|
||||||
|
|
||||||
|
Each patch must meet the following criteria to be merged:
|
||||||
|
* At least one review with +2
|
||||||
|
* At least one maintainer with +1
|
||||||
|
* At least +1 from the CI tests (gem5 must build and run)
|
||||||
|
* At least +1 from the style checker
|
||||||
|
|
||||||
|
Once a patch meets the above criteria, the submitter of the patch will be able
|
||||||
|
to merge the patch by pressing the "Submit" button on Gerrit. When the patch is
|
||||||
|
submitted, it is merged into the public gem5 branch.
|
1
COPYING
1
COPYING
|
@ -45,3 +45,4 @@ Copyright (c) 1994-1996 Carnegie-Mellon University.
|
||||||
Copyright (c) 1993-1994 Christopher G. Demetriou
|
Copyright (c) 1993-1994 Christopher G. Demetriou
|
||||||
Copyright (c) 1997-2002 Makoto Matsumoto and Takuji Nishimura
|
Copyright (c) 1997-2002 Makoto Matsumoto and Takuji Nishimura
|
||||||
Copyright (c) 1998,2001 Manuel Bouyer.
|
Copyright (c) 1998,2001 Manuel Bouyer.
|
||||||
|
Copyright (c) 2016-2017 Google Inc.
|
||||||
|
|
102
SConstruct
102
SConstruct
|
@ -268,10 +268,17 @@ against the gem5 style rules on %s.
|
||||||
This script will now install the hook in your %s.
|
This script will now install the hook in your %s.
|
||||||
Press enter to continue, or ctrl-c to abort: """
|
Press enter to continue, or ctrl-c to abort: """
|
||||||
|
|
||||||
mercurial_style_message = style_message % ("hg commit and qrefresh commands",
|
mercurial_style_message = """
|
||||||
".hg/hgrc file")
|
You're missing the gem5 style hook, which automatically checks your code
|
||||||
git_style_message = style_message % ("'git commit'",
|
against the gem5 style rules on hg commit and qrefresh commands.
|
||||||
".git/hooks/ directory")
|
This script will now install the hook in your .hg/hgrc file.
|
||||||
|
Press enter to continue, or ctrl-c to abort: """
|
||||||
|
|
||||||
|
git_style_message = """
|
||||||
|
You're missing the gem5 style or commit message hook. These hooks help
|
||||||
|
to ensure that your code follows gem5's style rules on git commit.
|
||||||
|
This script will now install the hook in your .git/hooks/ directory.
|
||||||
|
Press enter to continue, or ctrl-c to abort: """
|
||||||
|
|
||||||
mercurial_style_upgrade_message = """
|
mercurial_style_upgrade_message = """
|
||||||
Your Mercurial style hooks are not up-to-date. This script will now
|
Your Mercurial style hooks are not up-to-date. This script will now
|
||||||
|
@ -376,10 +383,43 @@ def install_git_style_hooks():
|
||||||
return
|
return
|
||||||
|
|
||||||
git_hooks = gitdir.Dir("hooks")
|
git_hooks = gitdir.Dir("hooks")
|
||||||
git_pre_commit_hook = git_hooks.File("pre-commit")
|
def hook_exists(hook_name):
|
||||||
git_style_script = File("util/git-pre-commit.py")
|
hook = git_hooks.File(hook_name)
|
||||||
|
return hook.exists()
|
||||||
|
|
||||||
if git_pre_commit_hook.exists():
|
def hook_install(hook_name, script):
|
||||||
|
hook = git_hooks.File(hook_name)
|
||||||
|
if hook.exists():
|
||||||
|
print "Warning: Can't install %s, hook already exists." % hook_name
|
||||||
|
return
|
||||||
|
|
||||||
|
if hook.islink():
|
||||||
|
print "Warning: Removing broken symlink for hook %s." % hook_name
|
||||||
|
os.unlink(hook.get_abspath())
|
||||||
|
|
||||||
|
if not git_hooks.exists():
|
||||||
|
mkdir(git_hooks.get_abspath())
|
||||||
|
git_hooks.clear()
|
||||||
|
|
||||||
|
abs_symlink_hooks = git_hooks.islink() and \
|
||||||
|
os.path.isabs(os.readlink(git_hooks.get_abspath()))
|
||||||
|
|
||||||
|
# Use a relative symlink if the hooks live in the source directory,
|
||||||
|
# and the hooks directory is not a symlink to an absolute path.
|
||||||
|
if hook.is_under(main.root) and not abs_symlink_hooks:
|
||||||
|
script_path = os.path.relpath(
|
||||||
|
os.path.realpath(script.get_abspath()),
|
||||||
|
os.path.realpath(hook.Dir(".").get_abspath()))
|
||||||
|
else:
|
||||||
|
script_path = script.get_abspath()
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.symlink(script_path, hook.get_abspath())
|
||||||
|
except:
|
||||||
|
print "Error updating git %s hook" % hook_name
|
||||||
|
raise
|
||||||
|
|
||||||
|
if hook_exists("pre-commit") and hook_exists("commit-msg"):
|
||||||
return
|
return
|
||||||
|
|
||||||
print git_style_message,
|
print git_style_message,
|
||||||
|
@ -389,22 +429,11 @@ def install_git_style_hooks():
|
||||||
print "Input exception, exiting scons.\n"
|
print "Input exception, exiting scons.\n"
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if not git_hooks.exists():
|
git_style_script = File("util/git-pre-commit.py")
|
||||||
mkdir(git_hooks.get_abspath())
|
git_msg_script = File("ext/git-commit-msg")
|
||||||
|
|
||||||
# Use a relative symlink if the hooks live in the source directory
|
hook_install("pre-commit", git_style_script)
|
||||||
if git_pre_commit_hook.is_under(main.root):
|
hook_install("commit-msg", git_msg_script)
|
||||||
script_path = os.path.relpath(
|
|
||||||
git_style_script.get_abspath(),
|
|
||||||
git_pre_commit_hook.Dir(".").get_abspath())
|
|
||||||
else:
|
|
||||||
script_path = git_style_script.get_abspath()
|
|
||||||
|
|
||||||
try:
|
|
||||||
os.symlink(script_path, git_pre_commit_hook.get_abspath())
|
|
||||||
except:
|
|
||||||
print "Error updating git pre-commit hook"
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Try to wire up git to the style hooks
|
# Try to wire up git to the style hooks
|
||||||
if not ignore_style and main.root.Entry(".git").exists():
|
if not ignore_style and main.root.Entry(".git").exists():
|
||||||
|
@ -651,10 +680,13 @@ if main['GCC'] or main['CLANG']:
|
||||||
main.Append(CCFLAGS=['-fno-strict-aliasing'])
|
main.Append(CCFLAGS=['-fno-strict-aliasing'])
|
||||||
# Enable -Wall and -Wextra and then disable the few warnings that
|
# Enable -Wall and -Wextra and then disable the few warnings that
|
||||||
# we consistently violate
|
# we consistently violate
|
||||||
# main.Append(CCFLAGS=['-Wall', '-Wundef', '-Wextra',
|
main.Append(CCFLAGS=['-Wall', '-Wundef', '-Wextra',
|
||||||
# '-Wno-sign-compare', '-Wno-unused-parameter'])
|
'-Wno-sign-compare', '-Wno-unused-parameter'])
|
||||||
# We always compile using C++11
|
# We always compile using C++11
|
||||||
main.Append(CXXFLAGS=['-std=c++11'])
|
main.Append(CXXFLAGS=['-std=c++11'])
|
||||||
|
if sys.platform.startswith('freebsd'):
|
||||||
|
main.Append(CCFLAGS=['-I/usr/local/include'])
|
||||||
|
main.Append(CXXFLAGS=['-I/usr/local/include'])
|
||||||
else:
|
else:
|
||||||
print termcap.Yellow + termcap.Bold + 'Error' + termcap.Normal,
|
print termcap.Yellow + termcap.Bold + 'Error' + termcap.Normal,
|
||||||
print "Don't know what compiler options to use for your compiler."
|
print "Don't know what compiler options to use for your compiler."
|
||||||
|
@ -690,7 +722,8 @@ if main['GCC']:
|
||||||
# to avoid performance penalties on certain AMD chips. Older
|
# to avoid performance penalties on certain AMD chips. Older
|
||||||
# assemblers detect this as an error, "Error: expecting string
|
# assemblers detect this as an error, "Error: expecting string
|
||||||
# instruction after `rep'"
|
# instruction after `rep'"
|
||||||
as_version_raw = readCommand([main['AS'], '-v', '/dev/null'],
|
as_version_raw = readCommand([main['AS'], '-v', '/dev/null',
|
||||||
|
'-o', '/dev/null'],
|
||||||
exception=False).split()
|
exception=False).split()
|
||||||
|
|
||||||
# version strings may contain extra distro-specific
|
# version strings may contain extra distro-specific
|
||||||
|
@ -771,6 +804,10 @@ elif main['CLANG']:
|
||||||
main.Append(CXXFLAGS=['-stdlib=libc++'])
|
main.Append(CXXFLAGS=['-stdlib=libc++'])
|
||||||
main.Append(LIBS=['c++'])
|
main.Append(LIBS=['c++'])
|
||||||
|
|
||||||
|
# On FreeBSD we need libthr.
|
||||||
|
if sys.platform.startswith('freebsd'):
|
||||||
|
main.Append(LIBS=['thr'])
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print termcap.Yellow + termcap.Bold + 'Error' + termcap.Normal,
|
print termcap.Yellow + termcap.Bold + 'Error' + termcap.Normal,
|
||||||
print "Don't know what compiler options to use for your compiler."
|
print "Don't know what compiler options to use for your compiler."
|
||||||
|
@ -884,8 +921,12 @@ main.Append(SWIGFLAGS=swig_flags)
|
||||||
# Check for 'timeout' from GNU coreutils. If present, regressions will
|
# Check for 'timeout' from GNU coreutils. If present, regressions will
|
||||||
# be run with a time limit. We require version 8.13 since we rely on
|
# be run with a time limit. We require version 8.13 since we rely on
|
||||||
# support for the '--foreground' option.
|
# support for the '--foreground' option.
|
||||||
timeout_lines = readCommand(['timeout', '--version'],
|
if sys.platform.startswith('freebsd'):
|
||||||
exception='').splitlines()
|
timeout_lines = readCommand(['gtimeout', '--version'],
|
||||||
|
exception='').splitlines()
|
||||||
|
else:
|
||||||
|
timeout_lines = readCommand(['timeout', '--version'],
|
||||||
|
exception='').splitlines()
|
||||||
# Get the first line and tokenize it
|
# Get the first line and tokenize it
|
||||||
timeout_version = timeout_lines[0].split() if timeout_lines else []
|
timeout_version = timeout_lines[0].split() if timeout_lines else []
|
||||||
main['TIMEOUT'] = timeout_version and \
|
main['TIMEOUT'] = timeout_version and \
|
||||||
|
@ -1009,7 +1050,7 @@ if not GetOption('without_python'):
|
||||||
main.Append(LINKFLAGS=[lib])
|
main.Append(LINKFLAGS=[lib])
|
||||||
else:
|
else:
|
||||||
lib = lib[2:]
|
lib = lib[2:]
|
||||||
if lib not in py_libs and lib != 'dl':
|
if lib not in py_libs:
|
||||||
py_libs.append(lib)
|
py_libs.append(lib)
|
||||||
|
|
||||||
# verify that this stuff works
|
# verify that this stuff works
|
||||||
|
@ -1083,6 +1124,11 @@ backtrace_impls = [ "none" ]
|
||||||
if conf.CheckLibWithHeader(None, 'execinfo.h', 'C',
|
if conf.CheckLibWithHeader(None, 'execinfo.h', 'C',
|
||||||
'backtrace_symbols_fd((void*)0, 0, 0);'):
|
'backtrace_symbols_fd((void*)0, 0, 0);'):
|
||||||
backtrace_impls.append("glibc")
|
backtrace_impls.append("glibc")
|
||||||
|
elif conf.CheckLibWithHeader('execinfo', 'execinfo.h', 'C',
|
||||||
|
'backtrace_symbols_fd((void*)0, 0, 0);'):
|
||||||
|
# NetBSD and FreeBSD need libexecinfo.
|
||||||
|
backtrace_impls.append("glibc")
|
||||||
|
main.Append(LIBS=['execinfo'])
|
||||||
|
|
||||||
if backtrace_impls[-1] == "none":
|
if backtrace_impls[-1] == "none":
|
||||||
default_backtrace_impl = "none"
|
default_backtrace_impl = "none"
|
||||||
|
|
|
@ -58,7 +58,7 @@
|
||||||
# serial links, the main internal crossbar, and an external hmc controller.
|
# serial links, the main internal crossbar, and an external hmc controller.
|
||||||
#
|
#
|
||||||
# - VAULT CONTROLLERS:
|
# - VAULT CONTROLLERS:
|
||||||
# Instances of the HMC_2500_x32 class with their functionality specified in
|
# Instances of the HMC_2500_1x32 class with their functionality specified in
|
||||||
# dram_ctrl.cc
|
# dram_ctrl.cc
|
||||||
#
|
#
|
||||||
# - THE MAIN XBAR:
|
# - THE MAIN XBAR:
|
||||||
|
|
|
@ -152,7 +152,7 @@ def config_mem(options, system):
|
||||||
them.
|
them.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if ( options.mem_type == "HMC_2500_x32"):
|
if ( options.mem_type == "HMC_2500_1x32"):
|
||||||
HMChost = HMC.config_host_hmc(options, system)
|
HMChost = HMC.config_host_hmc(options, system)
|
||||||
HMC.config_hmc(options, system, HMChost.hmc_host)
|
HMC.config_hmc(options, system, HMChost.hmc_host)
|
||||||
subsystem = system.hmc_dev
|
subsystem = system.hmc_dev
|
||||||
|
@ -163,7 +163,7 @@ def config_mem(options, system):
|
||||||
|
|
||||||
if options.tlm_memory:
|
if options.tlm_memory:
|
||||||
system.external_memory = m5.objects.ExternalSlave(
|
system.external_memory = m5.objects.ExternalSlave(
|
||||||
port_type="tlm",
|
port_type="tlm_slave",
|
||||||
port_data=options.tlm_memory,
|
port_data=options.tlm_memory,
|
||||||
port=system.membus.master,
|
port=system.membus.master,
|
||||||
addr_ranges=system.mem_ranges)
|
addr_ranges=system.mem_ranges)
|
||||||
|
@ -223,7 +223,7 @@ def config_mem(options, system):
|
||||||
|
|
||||||
# Connect the controllers to the membus
|
# Connect the controllers to the membus
|
||||||
for i in xrange(len(subsystem.mem_ctrls)):
|
for i in xrange(len(subsystem.mem_ctrls)):
|
||||||
if (options.mem_type == "HMC_2500_x32"):
|
if (options.mem_type == "HMC_2500_1x32"):
|
||||||
subsystem.mem_ctrls[i].port = xbar[i/4].master
|
subsystem.mem_ctrls[i].port = xbar[i/4].master
|
||||||
else:
|
else:
|
||||||
subsystem.mem_ctrls[i].port = xbar.master
|
subsystem.mem_ctrls[i].port = xbar.master
|
||||||
|
|
|
@ -77,7 +77,7 @@ def addNoISAOptions(parser):
|
||||||
parser.add_option("--list-mem-types",
|
parser.add_option("--list-mem-types",
|
||||||
action="callback", callback=_listMemTypes,
|
action="callback", callback=_listMemTypes,
|
||||||
help="List available memory types")
|
help="List available memory types")
|
||||||
parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
|
parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
|
||||||
choices=MemConfig.mem_names(),
|
choices=MemConfig.mem_names(),
|
||||||
help = "type of memory to use")
|
help = "type of memory to use")
|
||||||
parser.add_option("--mem-channels", type="int", default=1,
|
parser.add_option("--mem-channels", type="int", default=1,
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# Copyright (c) 2012, 2015 ARM Limited
|
# Copyright (c) 2012, 2015 ARM Limited
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
#
|
#
|
||||||
|
# Copyright (c) 2017, Centre National de la Recherche Scientifique (CNRS)
|
||||||
|
#
|
||||||
# The license below extends only to copyright in the software and shall
|
# The license below extends only to copyright in the software and shall
|
||||||
# not be construed as granting a license to any other intellectual
|
# not be construed as granting a license to any other intellectual
|
||||||
# property including but not limited to intellectual property relating
|
# property including but not limited to intellectual property relating
|
||||||
|
@ -34,10 +36,12 @@
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#
|
#
|
||||||
# Authors: Andreas Sandberg
|
# Authors: Andreas Sandberg
|
||||||
|
# Pierre-Yves Peneau
|
||||||
|
|
||||||
import m5.objects
|
import m5.objects
|
||||||
import inspect
|
import inspect
|
||||||
import sys
|
import sys
|
||||||
|
from m5.util import fatal
|
||||||
from textwrap import TextWrapper
|
from textwrap import TextWrapper
|
||||||
|
|
||||||
# Dictionary of mapping names of real CPU models to classes.
|
# Dictionary of mapping names of real CPU models to classes.
|
||||||
|
@ -74,8 +78,7 @@ def get(name):
|
||||||
try:
|
try:
|
||||||
return _platform_classes[real_name]
|
return _platform_classes[real_name]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
print "%s is not a valid Platform model." % (name,)
|
fatal("%s is not a valid Platform model." % (name,))
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
def print_platform_list():
|
def print_platform_list():
|
||||||
"""Print a list of available Platform classes including their aliases."""
|
"""Print a list of available Platform classes including their aliases."""
|
||||||
|
|
|
@ -131,8 +131,8 @@ class Benchmark(object):
|
||||||
|
|
||||||
func(self, isa, os)
|
func(self, isa, os)
|
||||||
|
|
||||||
def makeLiveProcessArgs(self, **kwargs):
|
def makeProcessArgs(self, **kwargs):
|
||||||
# set up default args for LiveProcess object
|
# set up default args for Process object
|
||||||
process_args = {}
|
process_args = {}
|
||||||
process_args['cmd'] = [ self.name ] + self.args
|
process_args['cmd'] = [ self.name ] + self.args
|
||||||
process_args['executable'] = self.executable
|
process_args['executable'] = self.executable
|
||||||
|
@ -147,11 +147,11 @@ class Benchmark(object):
|
||||||
|
|
||||||
return process_args
|
return process_args
|
||||||
|
|
||||||
def makeLiveProcess(self, **kwargs):
|
def makeProcess(self, **kwargs):
|
||||||
process_args = self.makeLiveProcessArgs(**kwargs)
|
process_args = self.makeProcessArgs(**kwargs)
|
||||||
|
|
||||||
# figure out working directory: use m5's outdir unless
|
# figure out working directory: use m5's outdir unless
|
||||||
# overridden by LiveProcess's cwd param
|
# overridden by Process's cwd param
|
||||||
cwd = process_args.get('cwd')
|
cwd = process_args.get('cwd')
|
||||||
|
|
||||||
if not cwd:
|
if not cwd:
|
||||||
|
@ -163,9 +163,9 @@ class Benchmark(object):
|
||||||
# copy input files to working directory
|
# copy input files to working directory
|
||||||
for d in self.inputs_dir:
|
for d in self.inputs_dir:
|
||||||
copyfiles(d, cwd)
|
copyfiles(d, cwd)
|
||||||
# generate LiveProcess object
|
# generate Process object
|
||||||
from m5.objects import LiveProcess
|
from m5.objects import Process
|
||||||
return LiveProcess(**process_args)
|
return Process(**process_args)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
@ -750,5 +750,5 @@ if __name__ == '__main__':
|
||||||
print 'class: %s' % bench.__name__
|
print 'class: %s' % bench.__name__
|
||||||
x = bench('alpha', 'tru64', input_set)
|
x = bench('alpha', 'tru64', input_set)
|
||||||
print '%s: %s' % (x, input_set)
|
print '%s: %s' % (x, input_set)
|
||||||
pprint(x.makeLiveProcessArgs())
|
pprint(x.makeProcessArgs())
|
||||||
print
|
print
|
||||||
|
|
20
configs/dist/sw.py
vendored
20
configs/dist/sw.py
vendored
|
@ -63,13 +63,17 @@ def build_switch(options):
|
||||||
link.int0 = switch.interface[i]
|
link.int0 = switch.interface[i]
|
||||||
|
|
||||||
return switch
|
return switch
|
||||||
# Add options
|
|
||||||
parser = optparse.OptionParser()
|
|
||||||
Options.addCommonOptions(parser)
|
|
||||||
Options.addFSOptions(parser)
|
|
||||||
(options, args) = parser.parse_args()
|
|
||||||
|
|
||||||
system = build_switch(options)
|
def main():
|
||||||
root = Root(full_system = True, system = system)
|
# Add options
|
||||||
Simulation.run(options, root, None, None)
|
parser = optparse.OptionParser()
|
||||||
|
Options.addCommonOptions(parser)
|
||||||
|
Options.addFSOptions(parser)
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
system = build_switch(options)
|
||||||
|
root = Root(full_system = True, system = system)
|
||||||
|
Simulation.run(options, root, None, None)
|
||||||
|
|
||||||
|
if __name__ == "__m5_main__":
|
||||||
|
main()
|
||||||
|
|
|
@ -80,7 +80,7 @@ except:
|
||||||
|
|
||||||
parser = optparse.OptionParser()
|
parser = optparse.OptionParser()
|
||||||
|
|
||||||
parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
|
parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
|
||||||
choices=MemConfig.mem_names(),
|
choices=MemConfig.mem_names(),
|
||||||
help = "type of memory to use")
|
help = "type of memory to use")
|
||||||
parser.add_option("--mem-size", action="store", type="string",
|
parser.add_option("--mem-size", action="store", type="string",
|
||||||
|
@ -252,6 +252,7 @@ system.tgen = TrafficGen(config_file = cfg_file_name,
|
||||||
|
|
||||||
# add a communication monitor
|
# add a communication monitor
|
||||||
system.monitor = CommMonitor()
|
system.monitor = CommMonitor()
|
||||||
|
system.monitor.footprint = MemFootprintProbe()
|
||||||
|
|
||||||
# connect the traffic generator to the system
|
# connect the traffic generator to the system
|
||||||
system.tgen.port = system.monitor.slave
|
system.tgen.port = system.monitor.slave
|
||||||
|
|
|
@ -53,8 +53,8 @@ from common import MemConfig
|
||||||
|
|
||||||
parser = optparse.OptionParser()
|
parser = optparse.OptionParser()
|
||||||
|
|
||||||
# Use a single-channel DDR3-1600 x64 by default
|
# Use a single-channel DDR3-1600 x64 (8x8 topology) by default
|
||||||
parser.add_option("--mem-type", type="choice", default="DDR3_1600_x64",
|
parser.add_option("--mem-type", type="choice", default="DDR3_1600_8x8",
|
||||||
choices=MemConfig.mem_names(),
|
choices=MemConfig.mem_names(),
|
||||||
help = "type of memory to use")
|
help = "type of memory to use")
|
||||||
|
|
||||||
|
|
|
@ -392,9 +392,9 @@ else:
|
||||||
# OpenCL driver
|
# OpenCL driver
|
||||||
driver = ClDriver(filename="hsa", codefile=kernel_files)
|
driver = ClDriver(filename="hsa", codefile=kernel_files)
|
||||||
for cpu in cpu_list:
|
for cpu in cpu_list:
|
||||||
cpu.workload = LiveProcess(executable = executable,
|
cpu.workload = Process(executable = executable,
|
||||||
cmd = [options.cmd] + options.options.split(),
|
cmd = [options.cmd] + options.options.split(),
|
||||||
drivers = [driver])
|
drivers = [driver])
|
||||||
for cp in cp_list:
|
for cp in cp_list:
|
||||||
cp.workload = host_cpu.workload
|
cp.workload = host_cpu.workload
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (c) 2016 ARM Limited
|
# Copyright (c) 2016-2017 ARM Limited
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
#
|
#
|
||||||
# The license below extends only to copyright in the software and shall
|
# The license below extends only to copyright in the software and shall
|
||||||
|
@ -44,6 +44,8 @@ m5.util.addToPath('../../')
|
||||||
from common.Caches import *
|
from common.Caches import *
|
||||||
from common import CpuConfig
|
from common import CpuConfig
|
||||||
|
|
||||||
|
have_kvm = "kvm" in CpuConfig.cpu_names()
|
||||||
|
|
||||||
class L1I(L1_ICache):
|
class L1I(L1_ICache):
|
||||||
tag_latency = 1
|
tag_latency = 1
|
||||||
data_latency = 1
|
data_latency = 1
|
||||||
|
@ -170,6 +172,14 @@ class AtomicCluster(CpuCluster):
|
||||||
def addL1(self):
|
def addL1(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class KvmCluster(CpuCluster):
|
||||||
|
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
|
||||||
|
cpu_config = [ CpuConfig.get("kvm"), None, None, None, None ]
|
||||||
|
super(KvmCluster, self).__init__(system, num_cpus, cpu_clock,
|
||||||
|
cpu_voltage, *cpu_config)
|
||||||
|
def addL1(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SimpleSystem(LinuxArmSystem):
|
class SimpleSystem(LinuxArmSystem):
|
||||||
cache_line_size = 64
|
cache_line_size = 64
|
||||||
|
|
140
configs/example/arm/dist_bigLITTLE.py
Normal file
140
configs/example/arm/dist_bigLITTLE.py
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
# Copyright (c) 2016-2017 ARM Limited
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# The license below extends only to copyright in the software and shall
|
||||||
|
# not be construed as granting a license to any other intellectual
|
||||||
|
# property including but not limited to intellectual property relating
|
||||||
|
# to a hardware implementation of the functionality of the software
|
||||||
|
# licensed hereunder. You may use the software subject to the license
|
||||||
|
# terms below provided that you ensure that this notice is replicated
|
||||||
|
# unmodified and in its entirety in all distributions of the software,
|
||||||
|
# modified or unmodified, in source code or in binary form.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are
|
||||||
|
# met: redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer;
|
||||||
|
# redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in the
|
||||||
|
# documentation and/or other materials provided with the distribution;
|
||||||
|
# neither the name of the copyright holders nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived from
|
||||||
|
# this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
#
|
||||||
|
# Authors: Gabor Dozsa
|
||||||
|
|
||||||
|
# This configuration file extends the example ARM big.LITTLE(tm)
|
||||||
|
# configuration to enabe dist-gem5 siulations of big.LITTLE systems.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
import m5
|
||||||
|
from m5.objects import *
|
||||||
|
|
||||||
|
import fs_bigLITTLE as bL
|
||||||
|
m5.util.addToPath("../../dist")
|
||||||
|
import sw
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
# Options for distributed simulation (i.e. dist-gem5)
|
||||||
|
parser.add_argument("--dist", action="store_true", help="Distributed gem5"\
|
||||||
|
" simulation.")
|
||||||
|
parser.add_argument("--is-switch", action="store_true",
|
||||||
|
help="Select the network switch simulator process for"\
|
||||||
|
" a distributed gem5 run.")
|
||||||
|
parser.add_argument("--dist-rank", default=0, action="store", type=int,
|
||||||
|
help="Rank of this system within the dist gem5 run.")
|
||||||
|
parser.add_argument("--dist-size", default=0, action="store", type=int,
|
||||||
|
help="Number of gem5 processes within the dist gem5"\
|
||||||
|
" run.")
|
||||||
|
parser.add_argument("--dist-server-name",
|
||||||
|
default="127.0.0.1",
|
||||||
|
action="store", type=str,
|
||||||
|
help="Name of the message server host\nDEFAULT:"\
|
||||||
|
" localhost")
|
||||||
|
parser.add_argument("--dist-server-port",
|
||||||
|
default=2200,
|
||||||
|
action="store", type=int,
|
||||||
|
help="Message server listen port\nDEFAULT: 2200")
|
||||||
|
parser.add_argument("--dist-sync-repeat",
|
||||||
|
default="0us",
|
||||||
|
action="store", type=str,
|
||||||
|
help="Repeat interval for synchronisation barriers"\
|
||||||
|
" among dist-gem5 processes\nDEFAULT:"\
|
||||||
|
" --ethernet-linkdelay")
|
||||||
|
parser.add_argument("--dist-sync-start",
|
||||||
|
default="1000000000000t",
|
||||||
|
action="store", type=str,
|
||||||
|
help="Time to schedule the first dist synchronisation"\
|
||||||
|
" barrier\nDEFAULT:1000000000000t")
|
||||||
|
parser.add_argument("--ethernet-linkspeed", default="10Gbps",
|
||||||
|
action="store", type=str,
|
||||||
|
help="Link speed in bps\nDEFAULT: 10Gbps")
|
||||||
|
parser.add_argument("--ethernet-linkdelay", default="10us",
|
||||||
|
action="store", type=str,
|
||||||
|
help="Link delay in seconds\nDEFAULT: 10us")
|
||||||
|
parser.add_argument("--etherdump", action="store", type=str, default="",
|
||||||
|
help="Specify the filename to dump a pcap capture of"\
|
||||||
|
" the ethernet traffic")
|
||||||
|
# Used by util/dist/gem5-dist.sh
|
||||||
|
parser.add_argument("--checkpoint-dir", type=str,
|
||||||
|
default=m5.options.outdir,
|
||||||
|
help="Directory to save/read checkpoints")
|
||||||
|
|
||||||
|
|
||||||
|
def addEthernet(system, options):
|
||||||
|
# create NIC
|
||||||
|
dev = IGbE_e1000()
|
||||||
|
system.attach_pci(dev)
|
||||||
|
system.ethernet = dev
|
||||||
|
|
||||||
|
# create distributed ethernet link
|
||||||
|
system.etherlink = DistEtherLink(speed = options.ethernet_linkspeed,
|
||||||
|
delay = options.ethernet_linkdelay,
|
||||||
|
dist_rank = options.dist_rank,
|
||||||
|
dist_size = options.dist_size,
|
||||||
|
server_name = options.dist_server_name,
|
||||||
|
server_port = options.dist_server_port,
|
||||||
|
sync_start = options.dist_sync_start,
|
||||||
|
sync_repeat = options.dist_sync_repeat)
|
||||||
|
system.etherlink.int0 = Parent.system.ethernet.interface
|
||||||
|
if options.etherdump:
|
||||||
|
system.etherdump = EtherDump(file=options.etherdump)
|
||||||
|
system.etherlink.dump = system.etherdump
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Generic ARM big.LITTLE configuration with "\
|
||||||
|
"dist-gem5 support")
|
||||||
|
bL.addOptions(parser)
|
||||||
|
addOptions(parser)
|
||||||
|
options = parser.parse_args()
|
||||||
|
|
||||||
|
if options.is_switch:
|
||||||
|
root = Root(full_system = True,
|
||||||
|
system = sw.build_switch(options))
|
||||||
|
else:
|
||||||
|
root = bL.build(options)
|
||||||
|
addEthernet(root.system, options)
|
||||||
|
|
||||||
|
bL.instantiate(options, checkpoint_dir=options.checkpoint_dir)
|
||||||
|
bL.run(options.checkpoint_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__m5_main__":
|
||||||
|
main()
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (c) 2016 ARM Limited
|
# Copyright (c) 2016-2017 ARM Limited
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
#
|
#
|
||||||
# The license below extends only to copyright in the software and shall
|
# The license below extends only to copyright in the software and shall
|
||||||
|
@ -44,6 +44,7 @@ import argparse
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import m5
|
import m5
|
||||||
|
import m5.util
|
||||||
from m5.objects import *
|
from m5.objects import *
|
||||||
|
|
||||||
m5.util.addToPath("../../")
|
m5.util.addToPath("../../")
|
||||||
|
@ -52,6 +53,7 @@ from common import SysPaths
|
||||||
from common import CpuConfig
|
from common import CpuConfig
|
||||||
|
|
||||||
import devices
|
import devices
|
||||||
|
from devices import AtomicCluster, KvmCluster
|
||||||
|
|
||||||
|
|
||||||
default_dtb = 'armv8_gem5_v1_big_little_2_2.dtb'
|
default_dtb = 'armv8_gem5_v1_big_little_2_2.dtb'
|
||||||
|
@ -61,6 +63,21 @@ default_rcs = 'bootscript.rcS'
|
||||||
|
|
||||||
default_mem_size= "2GB"
|
default_mem_size= "2GB"
|
||||||
|
|
||||||
|
def _to_ticks(value):
|
||||||
|
"""Helper function to convert a latency from string format to Ticks"""
|
||||||
|
|
||||||
|
return m5.ticks.fromSeconds(m5.util.convert.anyToLatency(value))
|
||||||
|
|
||||||
|
def _using_pdes(root):
|
||||||
|
"""Determine if the simulator is using multiple parallel event queues"""
|
||||||
|
|
||||||
|
for obj in root.descendants():
|
||||||
|
if not m5.proxy.isproxy(obj.eventq_index) and \
|
||||||
|
obj.eventq_index != root.eventq_index:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class BigCluster(devices.CpuCluster):
|
class BigCluster(devices.CpuCluster):
|
||||||
def __init__(self, system, num_cpus, cpu_clock,
|
def __init__(self, system, num_cpus, cpu_clock,
|
||||||
|
@ -107,11 +124,17 @@ def createSystem(caches, kernel, bootscript, disks=[]):
|
||||||
|
|
||||||
return sys
|
return sys
|
||||||
|
|
||||||
|
cpu_types = {
|
||||||
|
"atomic" : (AtomicCluster, AtomicCluster),
|
||||||
|
"timing" : (BigCluster, LittleCluster),
|
||||||
|
}
|
||||||
|
|
||||||
def main():
|
# Only add the KVM CPU if it has been compiled into gem5
|
||||||
parser = argparse.ArgumentParser(
|
if devices.have_kvm:
|
||||||
description="Generic ARM big.LITTLE configuration")
|
cpu_types["kvm"] = (KvmCluster, KvmCluster)
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
parser.add_argument("--restore-from", type=str, default=None,
|
parser.add_argument("--restore-from", type=str, default=None,
|
||||||
help="Restore from checkpoint")
|
help="Restore from checkpoint")
|
||||||
parser.add_argument("--dtb", type=str, default=default_dtb,
|
parser.add_argument("--dtb", type=str, default=default_dtb,
|
||||||
|
@ -122,8 +145,9 @@ def main():
|
||||||
help="Disks to instantiate")
|
help="Disks to instantiate")
|
||||||
parser.add_argument("--bootscript", type=str, default=default_rcs,
|
parser.add_argument("--bootscript", type=str, default=default_rcs,
|
||||||
help="Linux bootscript")
|
help="Linux bootscript")
|
||||||
parser.add_argument("--atomic", action="store_true", default=False,
|
parser.add_argument("--cpu-type", type=str, choices=cpu_types.keys(),
|
||||||
help="Use atomic CPUs")
|
default="timing",
|
||||||
|
help="CPU simulation mode. Default: %(default)s")
|
||||||
parser.add_argument("--kernel-init", type=str, default="/sbin/init",
|
parser.add_argument("--kernel-init", type=str, default="/sbin/init",
|
||||||
help="Override init")
|
help="Override init")
|
||||||
parser.add_argument("--big-cpus", type=int, default=1,
|
parser.add_argument("--big-cpus", type=int, default=1,
|
||||||
|
@ -138,11 +162,14 @@ def main():
|
||||||
help="Big CPU clock frequency")
|
help="Big CPU clock frequency")
|
||||||
parser.add_argument("--little-cpu-clock", type=str, default="1GHz",
|
parser.add_argument("--little-cpu-clock", type=str, default="1GHz",
|
||||||
help="Little CPU clock frequency")
|
help="Little CPU clock frequency")
|
||||||
|
parser.add_argument("--sim-quantum", type=str, default="1ms",
|
||||||
|
help="Simulation quantum for parallel simulation. " \
|
||||||
|
"Default: %(default)s")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def build(options):
|
||||||
m5.ticks.fixGlobalFrequency()
|
m5.ticks.fixGlobalFrequency()
|
||||||
|
|
||||||
options = parser.parse_args()
|
|
||||||
|
|
||||||
kernel_cmd = [
|
kernel_cmd = [
|
||||||
"earlyprintk=pl011,0x1c090000",
|
"earlyprintk=pl011,0x1c090000",
|
||||||
"console=ttyAMA0",
|
"console=ttyAMA0",
|
||||||
|
@ -167,35 +194,31 @@ def main():
|
||||||
root.system = system
|
root.system = system
|
||||||
system.boot_osflags = " ".join(kernel_cmd)
|
system.boot_osflags = " ".join(kernel_cmd)
|
||||||
|
|
||||||
AtomicCluster = devices.AtomicCluster
|
|
||||||
|
|
||||||
if options.big_cpus + options.little_cpus == 0:
|
if options.big_cpus + options.little_cpus == 0:
|
||||||
m5.util.panic("Empty CPU clusters")
|
m5.util.panic("Empty CPU clusters")
|
||||||
|
|
||||||
|
big_model, little_model = cpu_types[options.cpu_type]
|
||||||
|
|
||||||
|
all_cpus = []
|
||||||
# big cluster
|
# big cluster
|
||||||
if options.big_cpus > 0:
|
if options.big_cpus > 0:
|
||||||
if options.atomic:
|
system.bigCluster = big_model(system, options.big_cpus,
|
||||||
system.bigCluster = AtomicCluster(system, options.big_cpus,
|
options.big_cpu_clock)
|
||||||
options.big_cpu_clock)
|
system.mem_mode = system.bigCluster.memoryMode()
|
||||||
else:
|
all_cpus += system.bigCluster.cpus
|
||||||
system.bigCluster = BigCluster(system, options.big_cpus,
|
|
||||||
options.big_cpu_clock)
|
|
||||||
mem_mode = system.bigCluster.memoryMode()
|
|
||||||
# little cluster
|
# little cluster
|
||||||
if options.little_cpus > 0:
|
if options.little_cpus > 0:
|
||||||
if options.atomic:
|
system.littleCluster = little_model(system, options.little_cpus,
|
||||||
system.littleCluster = AtomicCluster(system, options.little_cpus,
|
options.little_cpu_clock)
|
||||||
options.little_cpu_clock)
|
system.mem_mode = system.littleCluster.memoryMode()
|
||||||
|
all_cpus += system.littleCluster.cpus
|
||||||
|
|
||||||
else:
|
# Figure out the memory mode
|
||||||
system.littleCluster = LittleCluster(system, options.little_cpus,
|
if options.big_cpus > 0 and options.little_cpus > 0 and \
|
||||||
options.little_cpu_clock)
|
system.littleCluster.memoryMode() != system.littleCluster.memoryMode():
|
||||||
mem_mode = system.littleCluster.memoryMode()
|
m5.util.panic("Memory mode missmatch among CPU clusters")
|
||||||
|
|
||||||
if options.big_cpus > 0 and options.little_cpus > 0:
|
|
||||||
if system.bigCluster.memoryMode() != system.littleCluster.memoryMode():
|
|
||||||
m5.util.panic("Memory mode missmatch among CPU clusters")
|
|
||||||
system.mem_mode = mem_mode
|
|
||||||
|
|
||||||
# create caches
|
# create caches
|
||||||
system.addCaches(options.caches, options.last_cache_level)
|
system.addCaches(options.caches, options.last_cache_level)
|
||||||
|
@ -205,23 +228,65 @@ def main():
|
||||||
if options.little_cpus > 0 and system.littleCluster.requireCaches():
|
if options.little_cpus > 0 and system.littleCluster.requireCaches():
|
||||||
m5.util.panic("Little CPU model requires caches")
|
m5.util.panic("Little CPU model requires caches")
|
||||||
|
|
||||||
|
# Create a KVM VM and do KVM-specific configuration
|
||||||
|
if issubclass(big_model, KvmCluster):
|
||||||
|
_build_kvm(system, all_cpus)
|
||||||
|
|
||||||
# Linux device tree
|
# Linux device tree
|
||||||
system.dtb_filename = SysPaths.binary(options.dtb)
|
system.dtb_filename = SysPaths.binary(options.dtb)
|
||||||
|
|
||||||
|
return root
|
||||||
|
|
||||||
|
def _build_kvm(system, cpus):
|
||||||
|
system.kvm_vm = KvmVM()
|
||||||
|
|
||||||
|
# Assign KVM CPUs to their own event queues / threads. This
|
||||||
|
# has to be done after creating caches and other child objects
|
||||||
|
# since these mustn't inherit the CPU event queue.
|
||||||
|
if len(cpus) > 1:
|
||||||
|
device_eq = 0
|
||||||
|
first_cpu_eq = 1
|
||||||
|
for idx, cpu in enumerate(cpus):
|
||||||
|
# Child objects usually inherit the parent's event
|
||||||
|
# queue. Override that and use the same event queue for
|
||||||
|
# all devices.
|
||||||
|
for obj in cpu.descendants():
|
||||||
|
obj.eventq_index = device_eq
|
||||||
|
cpu.eventq_index = first_cpu_eq + idx
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def instantiate(options, checkpoint_dir=None):
|
||||||
|
# Setup the simulation quantum if we are running in PDES-mode
|
||||||
|
# (e.g., when using KVM)
|
||||||
|
root = Root.getInstance()
|
||||||
|
if root and _using_pdes(root):
|
||||||
|
m5.util.inform("Running in PDES mode with a %s simulation quantum.",
|
||||||
|
options.sim_quantum)
|
||||||
|
root.sim_quantum = _to_ticks(options.sim_quantum)
|
||||||
|
|
||||||
# Get and load from the chkpt or simpoint checkpoint
|
# Get and load from the chkpt or simpoint checkpoint
|
||||||
if options.restore_from is not None:
|
if options.restore_from:
|
||||||
m5.instantiate(options.restore_from)
|
if checkpoint_dir and not os.path.isabs(options.restore_from):
|
||||||
|
cpt = os.path.join(checkpoint_dir, options.restore_from)
|
||||||
|
else:
|
||||||
|
cpt = options.restore_from
|
||||||
|
|
||||||
|
m5.util.inform("Restoring from checkpoint %s", cpt)
|
||||||
|
m5.instantiate(cpt)
|
||||||
else:
|
else:
|
||||||
m5.instantiate()
|
m5.instantiate()
|
||||||
|
|
||||||
|
|
||||||
|
def run(checkpoint_dir=m5.options.outdir):
|
||||||
# start simulation (and drop checkpoints when requested)
|
# start simulation (and drop checkpoints when requested)
|
||||||
while True:
|
while True:
|
||||||
event = m5.simulate()
|
event = m5.simulate()
|
||||||
exit_msg = event.getCause()
|
exit_msg = event.getCause()
|
||||||
if exit_msg == "checkpoint":
|
if exit_msg == "checkpoint":
|
||||||
print "Dropping checkpoint at tick %d" % m5.curTick()
|
print "Dropping checkpoint at tick %d" % m5.curTick()
|
||||||
cpt_dir = os.path.join(m5.options.outdir, "cpt.%d" % m5.curTick())
|
cpt_dir = os.path.join(checkpoint_dir, "cpt.%d" % m5.curTick())
|
||||||
m5.checkpoint(os.path.join(cpt_dir))
|
m5.checkpoint(cpt_dir)
|
||||||
print "Checkpoint done."
|
print "Checkpoint done."
|
||||||
else:
|
else:
|
||||||
print exit_msg, " @ ", m5.curTick()
|
print exit_msg, " @ ", m5.curTick()
|
||||||
|
@ -230,5 +295,15 @@ def main():
|
||||||
sys.exit(event.getCode())
|
sys.exit(event.getCode())
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Generic ARM big.LITTLE configuration")
|
||||||
|
addOptions(parser)
|
||||||
|
options = parser.parse_args()
|
||||||
|
root = build(options)
|
||||||
|
instantiate(options)
|
||||||
|
run()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__m5_main__":
|
if __name__ == "__m5_main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -143,7 +143,7 @@ def build_test_system(np):
|
||||||
for i in xrange(np)]
|
for i in xrange(np)]
|
||||||
|
|
||||||
if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass):
|
if is_kvm_cpu(TestCPUClass) or is_kvm_cpu(FutureClass):
|
||||||
test_sys.vm = KvmVM()
|
test_sys.kvm_vm = KvmVM()
|
||||||
|
|
||||||
if options.ruby:
|
if options.ruby:
|
||||||
# Check for timing mode because ruby does not support atomic accesses
|
# Check for timing mode because ruby does not support atomic accesses
|
||||||
|
@ -280,7 +280,7 @@ def build_drive_system(np):
|
||||||
drive_sys.kernel = binary(options.kernel)
|
drive_sys.kernel = binary(options.kernel)
|
||||||
|
|
||||||
if is_kvm_cpu(DriveCPUClass):
|
if is_kvm_cpu(DriveCPUClass):
|
||||||
drive_sys.vm = KvmVM()
|
drive_sys.kvm_vm = KvmVM()
|
||||||
|
|
||||||
drive_sys.iobridge = Bridge(delay='50ns',
|
drive_sys.iobridge = Bridge(delay='50ns',
|
||||||
ranges = drive_sys.mem_ranges)
|
ranges = drive_sys.mem_ranges)
|
||||||
|
|
|
@ -13,8 +13,8 @@ from common import HMC
|
||||||
|
|
||||||
parser = optparse.OptionParser()
|
parser = optparse.OptionParser()
|
||||||
|
|
||||||
# Use a HMC_2500_x32 by default
|
# Use a HMC_2500_1x32 (1 channel, 32-bits wide) by default
|
||||||
parser.add_option("--mem-type", type = "choice", default = "HMC_2500_x32",
|
parser.add_option("--mem-type", type = "choice", default = "HMC_2500_1x32",
|
||||||
choices = MemConfig.mem_names(),
|
choices = MemConfig.mem_names(),
|
||||||
help = "type of memory to use")
|
help = "type of memory to use")
|
||||||
|
|
||||||
|
|
|
@ -216,7 +216,7 @@ cfg_file.close()
|
||||||
proto_tester = TrafficGen(config_file = cfg_file_name)
|
proto_tester = TrafficGen(config_file = cfg_file_name)
|
||||||
|
|
||||||
# Set up the system along with a DRAM controller
|
# Set up the system along with a DRAM controller
|
||||||
system = System(physmem = DDR3_1600_x64())
|
system = System(physmem = DDR3_1600_8x8())
|
||||||
|
|
||||||
system.voltage_domain = VoltageDomain(voltage = '1V')
|
system.voltage_domain = VoltageDomain(voltage = '1V')
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,7 @@ def get_processes(options):
|
||||||
|
|
||||||
idx = 0
|
idx = 0
|
||||||
for wrkld in workloads:
|
for wrkld in workloads:
|
||||||
process = LiveProcess()
|
process = Process()
|
||||||
process.executable = wrkld
|
process.executable = wrkld
|
||||||
process.cwd = os.getcwd()
|
process.cwd = os.getcwd()
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ if options.bench:
|
||||||
else:
|
else:
|
||||||
exec("workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')" % (
|
exec("workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')" % (
|
||||||
app, options.spec_input))
|
app, options.spec_input))
|
||||||
multiprocesses.append(workload.makeLiveProcess())
|
multiprocesses.append(workload.makeProcess())
|
||||||
except:
|
except:
|
||||||
print >>sys.stderr, "Unable to find workload for %s: %s" % (
|
print >>sys.stderr, "Unable to find workload for %s: %s" % (
|
||||||
buildEnv['TARGET_ISA'], app)
|
buildEnv['TARGET_ISA'], app)
|
||||||
|
@ -209,7 +209,7 @@ for cpu in system.cpu:
|
||||||
|
|
||||||
if is_kvm_cpu(CPUClass) or is_kvm_cpu(FutureClass):
|
if is_kvm_cpu(CPUClass) or is_kvm_cpu(FutureClass):
|
||||||
if buildEnv['TARGET_ISA'] == 'x86':
|
if buildEnv['TARGET_ISA'] == 'x86':
|
||||||
system.vm = KvmVM()
|
system.kvm_vm = KvmVM()
|
||||||
for process in multiprocesses:
|
for process in multiprocesses:
|
||||||
process.useArchPT = True
|
process.useArchPT = True
|
||||||
process.kvmInSE = True
|
process.kvmInSE = True
|
||||||
|
|
|
@ -88,48 +88,15 @@ class L1DCache(L1Cache):
|
||||||
|
|
||||||
# Set the default size
|
# Set the default size
|
||||||
size = '64kB'
|
size = '64kB'
|
||||||
assoc = 8
|
|
||||||
|
|
||||||
SimpleOpts.add_option('--l1d_size',
|
SimpleOpts.add_option('--l1d_size',
|
||||||
help="L1 data cache size. Default: %s" % size)
|
help="L1 data cache size. Default: %s" % size)
|
||||||
SimpleOpts.add_option('--l1d_assoc',
|
|
||||||
help="L1 data cache associativity. Default: %s" % assoc)
|
|
||||||
SimpleOpts.add_option('--replacement_policy',
|
|
||||||
help="L1 cache replacement policy. [NMRU,LFU,LIFO,LRU,"
|
|
||||||
"Random,FIFO]")
|
|
||||||
|
|
||||||
def __init__(self, opts=None):
|
def __init__(self, opts=None):
|
||||||
super(L1DCache, self).__init__(opts)
|
super(L1DCache, self).__init__(opts)
|
||||||
if not opts:
|
if not opts or not opts.l1d_size:
|
||||||
return
|
return
|
||||||
|
self.size = opts.l1d_size
|
||||||
if opts.l1d_size:
|
|
||||||
self.size = opts.l1d_size
|
|
||||||
|
|
||||||
if opts.l1d_assoc:
|
|
||||||
self.size = opts.l1d_assoc
|
|
||||||
|
|
||||||
if opts.replacement_policy == "NMRU":
|
|
||||||
from m5.objects import NMRU
|
|
||||||
self.tags = NMRU()
|
|
||||||
elif opts.replacement_policy == "Random":
|
|
||||||
from m5.objects import RandomRepl
|
|
||||||
self.tags = RandomRepl()
|
|
||||||
elif opts.replacement_policy == "LRU":
|
|
||||||
from m5.objects import LRU
|
|
||||||
self.tags = LRU()
|
|
||||||
elif opts.replacement_policy == "LFU":
|
|
||||||
from m5.objects import LFU
|
|
||||||
self.tags = LFU()
|
|
||||||
elif opts.replacement_policy == "LIFO":
|
|
||||||
from m5.objects import LIFO
|
|
||||||
self.tags = LIFO()
|
|
||||||
elif opts.replacement_policy == "FIFO":
|
|
||||||
from m5.objects import FIFO
|
|
||||||
self.tags = FIFO()
|
|
||||||
elif opts.replacement_policy:
|
|
||||||
fatal("Unsupported replacement policy: %s" %
|
|
||||||
opts.replacement_policy)
|
|
||||||
|
|
||||||
def connectCPU(self, cpu):
|
def connectCPU(self, cpu):
|
||||||
"""Connect this cache's port to a CPU dcache port"""
|
"""Connect this cache's port to a CPU dcache port"""
|
||||||
|
|
|
@ -75,7 +75,7 @@ if m5.defines.buildEnv['TARGET_ISA'] == "x86":
|
||||||
system.cpu.interrupts[0].int_slave = system.membus.master
|
system.cpu.interrupts[0].int_slave = system.membus.master
|
||||||
|
|
||||||
# Create a DDR3 memory controller and connect it to the membus
|
# Create a DDR3 memory controller and connect it to the membus
|
||||||
system.mem_ctrl = DDR3_1600_x64()
|
system.mem_ctrl = DDR3_1600_8x8()
|
||||||
system.mem_ctrl.range = system.mem_ranges[0]
|
system.mem_ctrl.range = system.mem_ranges[0]
|
||||||
system.mem_ctrl.port = system.membus.master
|
system.mem_ctrl.port = system.membus.master
|
||||||
|
|
||||||
|
@ -89,7 +89,7 @@ isa = str(m5.defines.buildEnv['TARGET_ISA']).lower()
|
||||||
binary = 'tests/test-progs/hello/bin/' + isa + '/linux/hello'
|
binary = 'tests/test-progs/hello/bin/' + isa + '/linux/hello'
|
||||||
|
|
||||||
# Create a process for a simple "Hello World" application
|
# Create a process for a simple "Hello World" application
|
||||||
process = LiveProcess()
|
process = Process()
|
||||||
# Set the command
|
# Set the command
|
||||||
# cmd is a list which begins with the executable (like argv)
|
# cmd is a list which begins with the executable (like argv)
|
||||||
process.cmd = [binary]
|
process.cmd = [binary]
|
||||||
|
|
|
@ -128,12 +128,12 @@ if m5.defines.buildEnv['TARGET_ISA'] == "x86":
|
||||||
system.system_port = system.membus.slave
|
system.system_port = system.membus.slave
|
||||||
|
|
||||||
# Create a DDR3 memory controller
|
# Create a DDR3 memory controller
|
||||||
system.mem_ctrl = DDR3_1600_x64()
|
system.mem_ctrl = DDR3_1600_8x8()
|
||||||
system.mem_ctrl.range = system.mem_ranges[0]
|
system.mem_ctrl.range = system.mem_ranges[0]
|
||||||
system.mem_ctrl.port = system.membus.master
|
system.mem_ctrl.port = system.membus.master
|
||||||
|
|
||||||
# Create a process for a simple "Hello World" application
|
# Create a process for a simple "Hello World" application
|
||||||
process = LiveProcess()
|
process = Process()
|
||||||
# Set the command
|
# Set the command
|
||||||
# cmd is a list which begins with the executable (like argv)
|
# cmd is a list which begins with the executable (like argv)
|
||||||
process.cmd = [binary]
|
process.cmd = [binary]
|
||||||
|
|
|
@ -76,56 +76,56 @@ if args:
|
||||||
# --------------------
|
# --------------------
|
||||||
# Define Splash2 Benchmarks
|
# Define Splash2 Benchmarks
|
||||||
# ====================
|
# ====================
|
||||||
class Cholesky(LiveProcess):
|
class Cholesky(Process):
|
||||||
executable = options.rootdir + '/kernels/cholesky/CHOLESKY'
|
executable = options.rootdir + '/kernels/cholesky/CHOLESKY'
|
||||||
cmd = 'CHOLESKY -p' + str(options.numcpus) + ' '\
|
cmd = 'CHOLESKY -p' + str(options.numcpus) + ' '\
|
||||||
+ options.rootdir + '/kernels/cholesky/inputs/tk23.O'
|
+ options.rootdir + '/kernels/cholesky/inputs/tk23.O'
|
||||||
|
|
||||||
class FFT(LiveProcess):
|
class FFT(Process):
|
||||||
executable = options.rootdir + 'kernels/fft/FFT'
|
executable = options.rootdir + 'kernels/fft/FFT'
|
||||||
cmd = 'FFT -p' + str(options.numcpus) + ' -m18'
|
cmd = 'FFT -p' + str(options.numcpus) + ' -m18'
|
||||||
|
|
||||||
class LU_contig(LiveProcess):
|
class LU_contig(Process):
|
||||||
executable = options.rootdir + 'kernels/lu/contiguous_blocks/LU'
|
executable = options.rootdir + 'kernels/lu/contiguous_blocks/LU'
|
||||||
cmd = 'LU -p' + str(options.numcpus)
|
cmd = 'LU -p' + str(options.numcpus)
|
||||||
|
|
||||||
class LU_noncontig(LiveProcess):
|
class LU_noncontig(Process):
|
||||||
executable = options.rootdir + 'kernels/lu/non_contiguous_blocks/LU'
|
executable = options.rootdir + 'kernels/lu/non_contiguous_blocks/LU'
|
||||||
cmd = 'LU -p' + str(options.numcpus)
|
cmd = 'LU -p' + str(options.numcpus)
|
||||||
|
|
||||||
class Radix(LiveProcess):
|
class Radix(Process):
|
||||||
executable = options.rootdir + 'kernels/radix/RADIX'
|
executable = options.rootdir + 'kernels/radix/RADIX'
|
||||||
cmd = 'RADIX -n524288 -p' + str(options.numcpus)
|
cmd = 'RADIX -n524288 -p' + str(options.numcpus)
|
||||||
|
|
||||||
class Barnes(LiveProcess):
|
class Barnes(Process):
|
||||||
executable = options.rootdir + 'apps/barnes/BARNES'
|
executable = options.rootdir + 'apps/barnes/BARNES'
|
||||||
cmd = 'BARNES'
|
cmd = 'BARNES'
|
||||||
input = options.rootdir + 'apps/barnes/input.p' + str(options.numcpus)
|
input = options.rootdir + 'apps/barnes/input.p' + str(options.numcpus)
|
||||||
|
|
||||||
class FMM(LiveProcess):
|
class FMM(Process):
|
||||||
executable = options.rootdir + 'apps/fmm/FMM'
|
executable = options.rootdir + 'apps/fmm/FMM'
|
||||||
cmd = 'FMM'
|
cmd = 'FMM'
|
||||||
input = options.rootdir + 'apps/fmm/inputs/input.2048.p' + str(options.numcpus)
|
input = options.rootdir + 'apps/fmm/inputs/input.2048.p' + str(options.numcpus)
|
||||||
|
|
||||||
class Ocean_contig(LiveProcess):
|
class Ocean_contig(Process):
|
||||||
executable = options.rootdir + 'apps/ocean/contiguous_partitions/OCEAN'
|
executable = options.rootdir + 'apps/ocean/contiguous_partitions/OCEAN'
|
||||||
cmd = 'OCEAN -p' + str(options.numcpus)
|
cmd = 'OCEAN -p' + str(options.numcpus)
|
||||||
|
|
||||||
class Ocean_noncontig(LiveProcess):
|
class Ocean_noncontig(Process):
|
||||||
executable = options.rootdir + 'apps/ocean/non_contiguous_partitions/OCEAN'
|
executable = options.rootdir + 'apps/ocean/non_contiguous_partitions/OCEAN'
|
||||||
cmd = 'OCEAN -p' + str(options.numcpus)
|
cmd = 'OCEAN -p' + str(options.numcpus)
|
||||||
|
|
||||||
class Raytrace(LiveProcess):
|
class Raytrace(Process):
|
||||||
executable = options.rootdir + 'apps/raytrace/RAYTRACE'
|
executable = options.rootdir + 'apps/raytrace/RAYTRACE'
|
||||||
cmd = 'RAYTRACE -p' + str(options.numcpus) + ' ' \
|
cmd = 'RAYTRACE -p' + str(options.numcpus) + ' ' \
|
||||||
+ options.rootdir + 'apps/raytrace/inputs/teapot.env'
|
+ options.rootdir + 'apps/raytrace/inputs/teapot.env'
|
||||||
|
|
||||||
class Water_nsquared(LiveProcess):
|
class Water_nsquared(Process):
|
||||||
executable = options.rootdir + 'apps/water-nsquared/WATER-NSQUARED'
|
executable = options.rootdir + 'apps/water-nsquared/WATER-NSQUARED'
|
||||||
cmd = 'WATER-NSQUARED'
|
cmd = 'WATER-NSQUARED'
|
||||||
input = options.rootdir + 'apps/water-nsquared/input.p' + str(options.numcpus)
|
input = options.rootdir + 'apps/water-nsquared/input.p' + str(options.numcpus)
|
||||||
|
|
||||||
class Water_spatial(LiveProcess):
|
class Water_spatial(Process):
|
||||||
executable = options.rootdir + 'apps/water-spatial/WATER-SPATIAL'
|
executable = options.rootdir + 'apps/water-spatial/WATER-SPATIAL'
|
||||||
cmd = 'WATER-SPATIAL'
|
cmd = 'WATER-SPATIAL'
|
||||||
input = options.rootdir + 'apps/water-spatial/input.p' + str(options.numcpus)
|
input = options.rootdir + 'apps/water-spatial/input.p' + str(options.numcpus)
|
||||||
|
|
|
@ -77,39 +77,39 @@ if not options.numcpus:
|
||||||
# --------------------
|
# --------------------
|
||||||
# Define Splash2 Benchmarks
|
# Define Splash2 Benchmarks
|
||||||
# ====================
|
# ====================
|
||||||
class Cholesky(LiveProcess):
|
class Cholesky(Process):
|
||||||
cwd = options.rootdir + '/kernels/cholesky'
|
cwd = options.rootdir + '/kernels/cholesky'
|
||||||
executable = options.rootdir + '/kernels/cholesky/CHOLESKY'
|
executable = options.rootdir + '/kernels/cholesky/CHOLESKY'
|
||||||
cmd = ['CHOLESKY', '-p' + str(options.numcpus),
|
cmd = ['CHOLESKY', '-p' + str(options.numcpus),
|
||||||
options.rootdir + '/kernels/cholesky/inputs/tk23.O']
|
options.rootdir + '/kernels/cholesky/inputs/tk23.O']
|
||||||
|
|
||||||
class FFT(LiveProcess):
|
class FFT(Process):
|
||||||
cwd = options.rootdir + '/kernels/fft'
|
cwd = options.rootdir + '/kernels/fft'
|
||||||
executable = options.rootdir + '/kernels/fft/FFT'
|
executable = options.rootdir + '/kernels/fft/FFT'
|
||||||
cmd = ['FFT', '-p', str(options.numcpus), '-m18']
|
cmd = ['FFT', '-p', str(options.numcpus), '-m18']
|
||||||
|
|
||||||
class LU_contig(LiveProcess):
|
class LU_contig(Process):
|
||||||
executable = options.rootdir + '/kernels/lu/contiguous_blocks/LU'
|
executable = options.rootdir + '/kernels/lu/contiguous_blocks/LU'
|
||||||
cmd = ['LU', '-p', str(options.numcpus)]
|
cmd = ['LU', '-p', str(options.numcpus)]
|
||||||
cwd = options.rootdir + '/kernels/lu/contiguous_blocks'
|
cwd = options.rootdir + '/kernels/lu/contiguous_blocks'
|
||||||
|
|
||||||
class LU_noncontig(LiveProcess):
|
class LU_noncontig(Process):
|
||||||
executable = options.rootdir + '/kernels/lu/non_contiguous_blocks/LU'
|
executable = options.rootdir + '/kernels/lu/non_contiguous_blocks/LU'
|
||||||
cmd = ['LU', '-p', str(options.numcpus)]
|
cmd = ['LU', '-p', str(options.numcpus)]
|
||||||
cwd = options.rootdir + '/kernels/lu/non_contiguous_blocks'
|
cwd = options.rootdir + '/kernels/lu/non_contiguous_blocks'
|
||||||
|
|
||||||
class Radix(LiveProcess):
|
class Radix(Process):
|
||||||
executable = options.rootdir + '/kernels/radix/RADIX'
|
executable = options.rootdir + '/kernels/radix/RADIX'
|
||||||
cmd = ['RADIX', '-n524288', '-p', str(options.numcpus)]
|
cmd = ['RADIX', '-n524288', '-p', str(options.numcpus)]
|
||||||
cwd = options.rootdir + '/kernels/radix'
|
cwd = options.rootdir + '/kernels/radix'
|
||||||
|
|
||||||
class Barnes(LiveProcess):
|
class Barnes(Process):
|
||||||
executable = options.rootdir + '/apps/barnes/BARNES'
|
executable = options.rootdir + '/apps/barnes/BARNES'
|
||||||
cmd = ['BARNES']
|
cmd = ['BARNES']
|
||||||
input = options.rootdir + '/apps/barnes/input.p' + str(options.numcpus)
|
input = options.rootdir + '/apps/barnes/input.p' + str(options.numcpus)
|
||||||
cwd = options.rootdir + '/apps/barnes'
|
cwd = options.rootdir + '/apps/barnes'
|
||||||
|
|
||||||
class FMM(LiveProcess):
|
class FMM(Process):
|
||||||
executable = options.rootdir + '/apps/fmm/FMM'
|
executable = options.rootdir + '/apps/fmm/FMM'
|
||||||
cmd = ['FMM']
|
cmd = ['FMM']
|
||||||
if str(options.numcpus) == '1':
|
if str(options.numcpus) == '1':
|
||||||
|
@ -118,23 +118,23 @@ class FMM(LiveProcess):
|
||||||
input = options.rootdir + '/apps/fmm/inputs/input.2048.p' + str(options.numcpus)
|
input = options.rootdir + '/apps/fmm/inputs/input.2048.p' + str(options.numcpus)
|
||||||
cwd = options.rootdir + '/apps/fmm'
|
cwd = options.rootdir + '/apps/fmm'
|
||||||
|
|
||||||
class Ocean_contig(LiveProcess):
|
class Ocean_contig(Process):
|
||||||
executable = options.rootdir + '/apps/ocean/contiguous_partitions/OCEAN'
|
executable = options.rootdir + '/apps/ocean/contiguous_partitions/OCEAN'
|
||||||
cmd = ['OCEAN', '-p', str(options.numcpus)]
|
cmd = ['OCEAN', '-p', str(options.numcpus)]
|
||||||
cwd = options.rootdir + '/apps/ocean/contiguous_partitions'
|
cwd = options.rootdir + '/apps/ocean/contiguous_partitions'
|
||||||
|
|
||||||
class Ocean_noncontig(LiveProcess):
|
class Ocean_noncontig(Process):
|
||||||
executable = options.rootdir + '/apps/ocean/non_contiguous_partitions/OCEAN'
|
executable = options.rootdir + '/apps/ocean/non_contiguous_partitions/OCEAN'
|
||||||
cmd = ['OCEAN', '-p', str(options.numcpus)]
|
cmd = ['OCEAN', '-p', str(options.numcpus)]
|
||||||
cwd = options.rootdir + '/apps/ocean/non_contiguous_partitions'
|
cwd = options.rootdir + '/apps/ocean/non_contiguous_partitions'
|
||||||
|
|
||||||
class Raytrace(LiveProcess):
|
class Raytrace(Process):
|
||||||
executable = options.rootdir + '/apps/raytrace/RAYTRACE'
|
executable = options.rootdir + '/apps/raytrace/RAYTRACE'
|
||||||
cmd = ['RAYTRACE', '-p' + str(options.numcpus),
|
cmd = ['RAYTRACE', '-p' + str(options.numcpus),
|
||||||
options.rootdir + '/apps/raytrace/inputs/teapot.env']
|
options.rootdir + '/apps/raytrace/inputs/teapot.env']
|
||||||
cwd = options.rootdir + '/apps/raytrace'
|
cwd = options.rootdir + '/apps/raytrace'
|
||||||
|
|
||||||
class Water_nsquared(LiveProcess):
|
class Water_nsquared(Process):
|
||||||
executable = options.rootdir + '/apps/water-nsquared/WATER-NSQUARED'
|
executable = options.rootdir + '/apps/water-nsquared/WATER-NSQUARED'
|
||||||
cmd = ['WATER-NSQUARED']
|
cmd = ['WATER-NSQUARED']
|
||||||
if options.numcpus==1:
|
if options.numcpus==1:
|
||||||
|
@ -143,7 +143,7 @@ class Water_nsquared(LiveProcess):
|
||||||
input = options.rootdir + '/apps/water-nsquared/input.p' + str(options.numcpus)
|
input = options.rootdir + '/apps/water-nsquared/input.p' + str(options.numcpus)
|
||||||
cwd = options.rootdir + '/apps/water-nsquared'
|
cwd = options.rootdir + '/apps/water-nsquared'
|
||||||
|
|
||||||
class Water_spatial(LiveProcess):
|
class Water_spatial(Process):
|
||||||
executable = options.rootdir + '/apps/water-spatial/WATER-SPATIAL'
|
executable = options.rootdir + '/apps/water-spatial/WATER-SPATIAL'
|
||||||
cmd = ['WATER-SPATIAL']
|
cmd = ['WATER-SPATIAL']
|
||||||
if options.numcpus==1:
|
if options.numcpus==1:
|
||||||
|
|
191
ext/git-commit-msg
Executable file
191
ext/git-commit-msg
Executable file
|
@ -0,0 +1,191 @@
|
||||||
|
#!/bin/sh
|
||||||
|
# From Gerrit Code Review 2.13.5-2617-gba50ae91fd
|
||||||
|
#
|
||||||
|
# Part of Gerrit Code Review (https://www.gerritcodereview.com/)
|
||||||
|
#
|
||||||
|
# Copyright (C) 2009 The Android Open Source Project
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
unset GREP_OPTIONS
|
||||||
|
|
||||||
|
CHANGE_ID_AFTER="Bug|Depends-On|Issue|Test|Feature|Fixes|Fixed"
|
||||||
|
MSG="$1"
|
||||||
|
|
||||||
|
# Check for, and add if missing, a unique Change-Id
|
||||||
|
#
|
||||||
|
add_ChangeId() {
|
||||||
|
clean_message=`sed -e '
|
||||||
|
/^diff --git .*/{
|
||||||
|
s///
|
||||||
|
q
|
||||||
|
}
|
||||||
|
/^Signed-off-by:/d
|
||||||
|
/^#/d
|
||||||
|
' "$MSG" | git stripspace`
|
||||||
|
if test -z "$clean_message"
|
||||||
|
then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Do not add Change-Id to temp commits
|
||||||
|
if echo "$clean_message" | head -1 | grep -q '^\(fixup\|squash\)!'
|
||||||
|
then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "false" = "`git config --bool --get gerrit.createChangeId`"
|
||||||
|
then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Does Change-Id: already exist? if so, exit (no change).
|
||||||
|
if grep -i '^Change-Id:' "$MSG" >/dev/null
|
||||||
|
then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
id=`_gen_ChangeId`
|
||||||
|
T="$MSG.tmp.$$"
|
||||||
|
AWK=awk
|
||||||
|
if [ -x /usr/xpg4/bin/awk ]; then
|
||||||
|
# Solaris AWK is just too broken
|
||||||
|
AWK=/usr/xpg4/bin/awk
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Get core.commentChar from git config or use default symbol
|
||||||
|
commentChar=`git config --get core.commentChar`
|
||||||
|
commentChar=${commentChar:-#}
|
||||||
|
|
||||||
|
# How this works:
|
||||||
|
# - parse the commit message as (textLine+ blankLine*)*
|
||||||
|
# - assume textLine+ to be a footer until proven otherwise
|
||||||
|
# - exception: the first block is not footer (as it is the title)
|
||||||
|
# - read textLine+ into a variable
|
||||||
|
# - then count blankLines
|
||||||
|
# - once the next textLine appears, print textLine+ blankLine* as these
|
||||||
|
# aren't footer
|
||||||
|
# - in END, the last textLine+ block is available for footer parsing
|
||||||
|
$AWK '
|
||||||
|
BEGIN {
|
||||||
|
# while we start with the assumption that textLine+
|
||||||
|
# is a footer, the first block is not.
|
||||||
|
isFooter = 0
|
||||||
|
footerComment = 0
|
||||||
|
blankLines = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Skip lines starting with commentChar without any spaces before it.
|
||||||
|
/^'"$commentChar"'/ { next }
|
||||||
|
|
||||||
|
# Skip the line starting with the diff command and everything after it,
|
||||||
|
# up to the end of the file, assuming it is only patch data.
|
||||||
|
# If more than one line before the diff was empty, strip all but one.
|
||||||
|
/^diff --git / {
|
||||||
|
blankLines = 0
|
||||||
|
while (getline) { }
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# Count blank lines outside footer comments
|
||||||
|
/^$/ && (footerComment == 0) {
|
||||||
|
blankLines++
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# Catch footer comment
|
||||||
|
/^\[[a-zA-Z0-9-]+:/ && (isFooter == 1) {
|
||||||
|
footerComment = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
/]$/ && (footerComment == 1) {
|
||||||
|
footerComment = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
# We have a non-blank line after blank lines. Handle this.
|
||||||
|
(blankLines > 0) {
|
||||||
|
print lines
|
||||||
|
for (i = 0; i < blankLines; i++) {
|
||||||
|
print ""
|
||||||
|
}
|
||||||
|
|
||||||
|
lines = ""
|
||||||
|
blankLines = 0
|
||||||
|
isFooter = 1
|
||||||
|
footerComment = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Detect that the current block is not the footer
|
||||||
|
(footerComment == 0) && (!/^\[?[a-zA-Z0-9-]+:/ || /^[a-zA-Z0-9-]+:\/\//) {
|
||||||
|
isFooter = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
# We need this information about the current last comment line
|
||||||
|
if (footerComment == 2) {
|
||||||
|
footerComment = 0
|
||||||
|
}
|
||||||
|
if (lines != "") {
|
||||||
|
lines = lines "\n";
|
||||||
|
}
|
||||||
|
lines = lines $0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Footer handling:
|
||||||
|
# If the last block is considered a footer, splice in the Change-Id at the
|
||||||
|
# right place.
|
||||||
|
# Look for the right place to inject Change-Id by considering
|
||||||
|
# CHANGE_ID_AFTER. Keys listed in it (case insensitive) come first,
|
||||||
|
# then Change-Id, then everything else (eg. Signed-off-by:).
|
||||||
|
#
|
||||||
|
# Otherwise just print the last block, a new line and the Change-Id as a
|
||||||
|
# block of its own.
|
||||||
|
END {
|
||||||
|
unprinted = 1
|
||||||
|
if (isFooter == 0) {
|
||||||
|
print lines "\n"
|
||||||
|
lines = ""
|
||||||
|
}
|
||||||
|
changeIdAfter = "^(" tolower("'"$CHANGE_ID_AFTER"'") "):"
|
||||||
|
numlines = split(lines, footer, "\n")
|
||||||
|
for (line = 1; line <= numlines; line++) {
|
||||||
|
if (unprinted && match(tolower(footer[line]), changeIdAfter) != 1) {
|
||||||
|
unprinted = 0
|
||||||
|
print "Change-Id: I'"$id"'"
|
||||||
|
}
|
||||||
|
print footer[line]
|
||||||
|
}
|
||||||
|
if (unprinted) {
|
||||||
|
print "Change-Id: I'"$id"'"
|
||||||
|
}
|
||||||
|
}' "$MSG" > "$T" && mv "$T" "$MSG" || rm -f "$T"
|
||||||
|
}
|
||||||
|
_gen_ChangeIdInput() {
|
||||||
|
echo "tree `git write-tree`"
|
||||||
|
if parent=`git rev-parse "HEAD^0" 2>/dev/null`
|
||||||
|
then
|
||||||
|
echo "parent $parent"
|
||||||
|
fi
|
||||||
|
echo "author `git var GIT_AUTHOR_IDENT`"
|
||||||
|
echo "committer `git var GIT_COMMITTER_IDENT`"
|
||||||
|
echo
|
||||||
|
printf '%s' "$clean_message"
|
||||||
|
}
|
||||||
|
_gen_ChangeId() {
|
||||||
|
_gen_ChangeIdInput |
|
||||||
|
git hash-object -t commit --stdin
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
add_ChangeId
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
# Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
# Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
# Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
# Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# calc.py
|
# calc.py
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python2
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# calc.py
|
# calc.py
|
||||||
|
|
|
@ -1,349 +0,0 @@
|
||||||
Date: Oct 19, 1994
|
|
||||||
|
|
||||||
This is the directory for the second release of the Stanford Parallel
|
|
||||||
Applications for Shared-Memory (SPLASH-2) programs. For further
|
|
||||||
information contact splash@mojave.stanford.edu.
|
|
||||||
|
|
||||||
PLEASE NOTE: Due to our limited resources, we will be unable to spend
|
|
||||||
much time answering questions about the applications.
|
|
||||||
|
|
||||||
splash.tar contains the tared version of all the files. Grabbing this
|
|
||||||
file will get you everything you need. We also keep the files
|
|
||||||
individually untared for partial retrieval. The splash.tar file is not
|
|
||||||
compressed, but the large files in it are. We attempted to compress the
|
|
||||||
splash.tar file to reduce the file size further, but this resulted in
|
|
||||||
a negative compression ratio.
|
|
||||||
|
|
||||||
|
|
||||||
DIFFERENCES BETWEEN SPLASH AND SPLASH-2:
|
|
||||||
----------------------------------------
|
|
||||||
|
|
||||||
The SPLASH-2 suite contains two types of codes: full applications and
|
|
||||||
kernels. Each of the codes utilizes the Argonne National Laboratories
|
|
||||||
(ANL) parmacs macros for parallel constructs. Unlike the codes in the
|
|
||||||
original SPLASH release, each of the codes assumes the use of a
|
|
||||||
"lightweight threads" model (which we hereafter refer to as the "threads"
|
|
||||||
model) in which child processes share the same virtual address space as
|
|
||||||
their parent process. In order for the codes to function correctly,
|
|
||||||
the CREATE macro should call the proper Unix system routine (e.g. "sproc"
|
|
||||||
in the Silicon Graphics IRIX operating system) instead of the "fork"
|
|
||||||
routine that was used for SPLASH. The difference is that processes
|
|
||||||
created with the Unix fork command receive their own private copies of
|
|
||||||
all global variables. In the threads model, child processes share the
|
|
||||||
same virtual address space, and hence all global data. Some of the
|
|
||||||
codes function correctly when the Unix "fork" command is used for child
|
|
||||||
process creation as well. Comments in the code header denote those
|
|
||||||
applications which function correctly with "fork."
|
|
||||||
|
|
||||||
|
|
||||||
MACROS:
|
|
||||||
-------
|
|
||||||
|
|
||||||
Macros for the previous release of the SPLASH application suite can be
|
|
||||||
obtained via anonymous ftp to www-flash.stanford.edu. The macros are
|
|
||||||
contained in the pub/old_splash/splash/macros subdirectory. HOWEVER,
|
|
||||||
THE MACRO FILES MUST BE MODIFIED IN ORDER TO BE USED WITH SPLASH-2 CODES.
|
|
||||||
The CREATE macros must be changed so that they call the proper process
|
|
||||||
creation routine (See DIFFERENCES section above) instead of "fork."
|
|
||||||
|
|
||||||
In this macros subdirectory, macros and sample makefiles are provided
|
|
||||||
for three machines:
|
|
||||||
|
|
||||||
Encore Multimax (CMU Mach 2.5: C and Fortran)
|
|
||||||
SGI 4D/240 (IRIX System V Release 3.3: C only)
|
|
||||||
Alliant FX/8 (Alliant Rev. 5.0: C and Fortran)
|
|
||||||
|
|
||||||
These macros work for us with the above operating systems. Unfortunately,
|
|
||||||
our limited resources prevent us from supporting them in any way or
|
|
||||||
even fielding questions about them. If they don't work for you, please
|
|
||||||
contact Argonne National Labs for a version that will. An e-mail address
|
|
||||||
to try might be monitor-users-request@mcs.anl.gov. An excerpt from
|
|
||||||
a message, received from Argonne, concerning obtaining the macros follows:
|
|
||||||
|
|
||||||
"The parmacs package is in the public domain. Approximately 15 people at
|
|
||||||
Argonne (or associated with Argonne or students) have worked on the
|
|
||||||
parmacs package at one time or another. The parmacs package is
|
|
||||||
implemented via macros using the M4 macropreprocessor (standard on most
|
|
||||||
Unix systems). Current distribution of the software is somewhat ad hoc.
|
|
||||||
Most C versions can be obtained from netlib (send electronic mail to
|
|
||||||
netlib@ornl.gov with the message send index from parmacs). Fortran
|
|
||||||
versions have been emailed directly or sent on tape. The primary
|
|
||||||
documentation for the parmacs package is the book ``Portable Programs for
|
|
||||||
Parallel Processors'' by Lusk, et al, Holt, Rinehart, and Winston 1987."
|
|
||||||
|
|
||||||
The makefiles provided in the individual program directories specify
|
|
||||||
a null macro set that will turn the parallel programs into sequential
|
|
||||||
ones. Note that we do not have a null macro set for FORTRAN.
|
|
||||||
|
|
||||||
|
|
||||||
CODE ENHANCEMENTS:
|
|
||||||
------------------
|
|
||||||
|
|
||||||
All of the codes are designed for shared address space multiprocessors
|
|
||||||
with physically distributed main memory. For these types of machines,
|
|
||||||
process migration and poor data distribution can decrease performance
|
|
||||||
to suboptimal levels. In the applications, comments indicating potential
|
|
||||||
enhancements can be found which will improve performance. Each potential
|
|
||||||
enhancement is denoted by a comment beginning with "POSSIBLE ENHANCEMENT".
|
|
||||||
The potential enhancements which we identify are:
|
|
||||||
|
|
||||||
(1) Data Distribution
|
|
||||||
|
|
||||||
Comments are placed in the code indicating where directives should
|
|
||||||
be placed so that data can be migrated to the local memories of
|
|
||||||
nodes, thus allowing for remote communication to be minimized.
|
|
||||||
|
|
||||||
(2) Process-to-Processor Assignment
|
|
||||||
|
|
||||||
Comments are placed in the code indicating where directives should
|
|
||||||
be placed so that processes can be "pinned" to processors,
|
|
||||||
preventing them from migrating from processor to processor.
|
|
||||||
|
|
||||||
In addition, to facilitate simulation studies, we note points in the
|
|
||||||
codes where statistics gathering routines should be turned on so that
|
|
||||||
cold-start and initialization effects can be avoided.
|
|
||||||
|
|
||||||
As previously mentioned, processes are assumed to be created through calls
|
|
||||||
to a "threads" model creation routine. One important side effect is that
|
|
||||||
this model causes all global variables to be shared (whereas the fork model
|
|
||||||
causes all processes to get their own private copy of global variables).
|
|
||||||
In order to mimic the behavior of global variables in the fork model, many
|
|
||||||
of the applications provide arrays of structures that can be accessed by
|
|
||||||
process ID, such as:
|
|
||||||
|
|
||||||
struct per_process_info {
|
|
||||||
char pad[PAD_LENGTH];
|
|
||||||
unsigned start_time;
|
|
||||||
unsigned end_time;
|
|
||||||
char pad[PAD_LENGTH];
|
|
||||||
} PPI[MAX_PROCS];
|
|
||||||
|
|
||||||
In these structures, padding is inserted to ensure that the structure
|
|
||||||
information associated with each process can be placed on a different
|
|
||||||
page of memory, and can thus be explicitly migrated to that processor's
|
|
||||||
local memory system. We follow this strategy for certain variables since
|
|
||||||
these data really belong to a process and should be allocated in its local
|
|
||||||
memory. A programming model that had the ability to declare global private
|
|
||||||
data would have automatically ensured that these data were private, and
|
|
||||||
that false sharing did not occur across different structures in the
|
|
||||||
array. However, since the threads model does not provide this capability,
|
|
||||||
it is provided by explicitly introducing arrays of structures with padding.
|
|
||||||
The padding constants used in the programs (PAD_LENGTH in this example)
|
|
||||||
can easily be changed to suit the particular characteristics of a given
|
|
||||||
system. The actual data that is manipulated by individual applications
|
|
||||||
(e.g. grid points, particle data, etc) is not padded, however.
|
|
||||||
|
|
||||||
Finally, for some applications we provide less-optimized versions of the
|
|
||||||
codes. The less-optimized versions utilize data structures that lead to
|
|
||||||
simpler implementations, but which do not allow for optimal data
|
|
||||||
distribution (and can thus generate false-sharing).
|
|
||||||
|
|
||||||
|
|
||||||
REPORT:
|
|
||||||
-------
|
|
||||||
|
|
||||||
A report will be put together shortly describing the structure, function,
|
|
||||||
and performance characteristics of each application. The report will be
|
|
||||||
similar to the original SPLASH report (see the original report for the
|
|
||||||
issues discussed). The report will provide quantitative data (for two
|
|
||||||
different cache line size) for characteristics such as working set size
|
|
||||||
and miss rates (local versus remote, etc.). In addition, the report
|
|
||||||
will discuss cache behavior and synchronization behavior of the
|
|
||||||
applications as well. In the mean time, each application directory has
|
|
||||||
a README file that describes how to run each application. In addition,
|
|
||||||
most applications have comments in their headers describing how to run
|
|
||||||
each application.
|
|
||||||
|
|
||||||
|
|
||||||
README FILES:
|
|
||||||
-------------
|
|
||||||
|
|
||||||
Each application has an associated README file. It is VERY important to
|
|
||||||
read these files carefully, as they discuss the important parameters to
|
|
||||||
supply for each application, as well as other issues involved in running
|
|
||||||
the programs. In each README file, we discuss the impact of explicitly
|
|
||||||
distributing data on the Stanford DASH Multiprocessor. Unless otherwise
|
|
||||||
specified, we assume that the default data distribution mechanism is
|
|
||||||
through round-robin page allocation.
|
|
||||||
|
|
||||||
|
|
||||||
PROBLEM SIZES:
|
|
||||||
--------------
|
|
||||||
|
|
||||||
For each application, the README file describes a recommended problem
|
|
||||||
size that is a reasonable base problem size that both can be simulated
|
|
||||||
and is not too small for reality on a machine with up to 64 processors.
|
|
||||||
For the purposes of studying algorithm performance, the parameters
|
|
||||||
associated with each application can be varied. However, for the
|
|
||||||
purposes of comparing machine architectures, the README files describe
|
|
||||||
which parameters can be varied, and which should remain constant (or at
|
|
||||||
their default values) for comparability. If the specific "base"
|
|
||||||
parameters that are specified are not used, then results which are
|
|
||||||
reported should explicitly state which parameters were changed, what
|
|
||||||
their new values are, and address why they were changed.
|
|
||||||
|
|
||||||
|
|
||||||
CORE PROGRAMS:
|
|
||||||
--------------
|
|
||||||
|
|
||||||
Since the number of programs has increased over SPLASH, and since not
|
|
||||||
everyone may be able to use all the programs in a given study, we
|
|
||||||
identify some of the programs as "core" programs that should be used
|
|
||||||
in most studies for comparability. In the currently available set, these
|
|
||||||
core programs include:
|
|
||||||
|
|
||||||
(1) Ocean Simulation
|
|
||||||
(2) Hierarchical Radiosity
|
|
||||||
(3) Water Simulation with Spatial data structure
|
|
||||||
(4) Barnes-Hut
|
|
||||||
(5) FFT
|
|
||||||
(6) Blocked Sparse Cholesky Factorization
|
|
||||||
(7) Radix Sort
|
|
||||||
|
|
||||||
The less optimized versions of the programs, when provided, should be
|
|
||||||
used only in addition to these.
|
|
||||||
|
|
||||||
|
|
||||||
MAILING LIST:
|
|
||||||
-------------
|
|
||||||
|
|
||||||
Please send a note to splash@mojave.stanford.edu if you have copied over
|
|
||||||
the programs, so that we can put you on a mailing list for update reports.
|
|
||||||
|
|
||||||
|
|
||||||
AUTHORSHIP:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
The applications provided in the SPLASH-2 suite were developed by a number
|
|
||||||
of people. The report lists authors primarily responsible for the
|
|
||||||
development of each application code. The codes were made ready for
|
|
||||||
distribution and the README files were prepared by Steven Cameron Woo and
|
|
||||||
Jaswinder Pal Singh.
|
|
||||||
|
|
||||||
|
|
||||||
CODE CHANGES:
|
|
||||||
-------------
|
|
||||||
|
|
||||||
If modifications are made to the codes which improve their performance,
|
|
||||||
we would like to hear about them. Please send email to
|
|
||||||
splash@mojave.stanford.edu detailing the changes.
|
|
||||||
|
|
||||||
|
|
||||||
UPDATE REPORTS:
|
|
||||||
---------------
|
|
||||||
|
|
||||||
Watch this file for information regarding changes to codes and additions
|
|
||||||
to the application suite.
|
|
||||||
|
|
||||||
|
|
||||||
CHANGES:
|
|
||||||
-------
|
|
||||||
|
|
||||||
10-21-94: Ocean code, contiguous partitions, line 247 of slave1.C changed
|
|
||||||
from
|
|
||||||
|
|
||||||
t2a[0][0] = hh3*t2a[0][0]+hh1*psi[procid][1][0][0];
|
|
||||||
|
|
||||||
to
|
|
||||||
|
|
||||||
t2a[0][0] = hh3*t2a[0][0]+hh1*t2c[0][0];
|
|
||||||
|
|
||||||
This change does not affect correctness; it is an optimization
|
|
||||||
that was performed elsewhere in the code but overlooked here.
|
|
||||||
|
|
||||||
11-01-94: Barnes, file code_io.C, line 55 changed from
|
|
||||||
|
|
||||||
in_real(instr, tnow);
|
|
||||||
|
|
||||||
to
|
|
||||||
|
|
||||||
in_real(instr, &tnow);
|
|
||||||
|
|
||||||
11-01-94: Raytrace, file main.C, lines 216-223 changed from
|
|
||||||
|
|
||||||
if ((pid == 0) || (dostats))
|
|
||||||
CLOCK(end);
|
|
||||||
|
|
||||||
gm->partime[0] = (end - begin) & 0x7FFFFFFF;
|
|
||||||
if (pid == 0) gm->par_start_time = begin;
|
|
||||||
|
|
||||||
/* printf("Process %ld elapsed time %lu.\n", pid, lapsed); */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
to
|
|
||||||
|
|
||||||
if ((pid == 0) || (dostats)) {
|
|
||||||
CLOCK(end);
|
|
||||||
gm->partime[pid] = (end - begin) & 0x7FFFFFFF;
|
|
||||||
if (pid == 0) gm->par_start_time = begin;
|
|
||||||
}
|
|
||||||
|
|
||||||
11-13-94: Raytrace, file memory.C
|
|
||||||
|
|
||||||
The use of the word MAIN_INITENV in a comment in memory.c causes
|
|
||||||
m4 to expand this macro, and some implementations may get confused
|
|
||||||
and generate the wrong C code.
|
|
||||||
|
|
||||||
11-13-94: Radiosity, file rad_main.C
|
|
||||||
|
|
||||||
rad_main.C uses the macro CREATE_LITE. All three instances of
|
|
||||||
CREATE_LITE should be changed to CREATE.
|
|
||||||
|
|
||||||
11-13-94: Water-spatial and Water-nsquared, file makefile
|
|
||||||
|
|
||||||
makefiles were changed so that the compilation phases included the
|
|
||||||
CFLAGS options instead of the CCOPTS options, which did not exist.
|
|
||||||
|
|
||||||
11-17-94: FMM, file particle.C
|
|
||||||
|
|
||||||
Comment regarding data distribution of particle_array data
|
|
||||||
structure is incorrect. Round-robin allocation should be used.
|
|
||||||
|
|
||||||
11-18-94: OCEAN, contiguous partitions, files main.C and linkup.C
|
|
||||||
|
|
||||||
Eliminated a problem which caused non-doubleword aligned
|
|
||||||
accesses to doublewords for the uniprocessor case.
|
|
||||||
|
|
||||||
main.C: Added lines 467-471:
|
|
||||||
|
|
||||||
if (nprocs%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
d_size += sizeof(double ***);
|
|
||||||
}
|
|
||||||
|
|
||||||
Added same lines in file linkup.C at line numbers 100 and 159.
|
|
||||||
|
|
||||||
07-30-95: RADIX has been changed. A tree-structured parallel prefix
|
|
||||||
computation is now used instead of a linear one.
|
|
||||||
|
|
||||||
LU had been modified. A comment describing how to distribute
|
|
||||||
data (one of the POSSIBLE ENHANCEMENTS) was incorrect for the
|
|
||||||
contiguous_blocks version of LU. Also, a modification was made
|
|
||||||
that reduces false sharing at line 206 of lu.C:
|
|
||||||
|
|
||||||
last_malloc[i] = (double *) (((unsigned) last_malloc[i]) + PAGE_SIZE -
|
|
||||||
((unsigned) last_malloc[i]) % PAGE_SIZE);
|
|
||||||
|
|
||||||
A subdirectory shmem_files was added under the codes directory.
|
|
||||||
This directory contains a file that can be compiled on SGI machines
|
|
||||||
which replaces the libsgi.a file distributed in the original SPLASH
|
|
||||||
release.
|
|
||||||
|
|
||||||
09-26-95: Fixed a bug in LU. Line 201 was changed from
|
|
||||||
|
|
||||||
last_malloc[i] = (double *) G_MALLOC(proc_bytes[i])
|
|
||||||
|
|
||||||
to
|
|
||||||
|
|
||||||
last_malloc[i] = (double *) G_MALLOC(proc_bytes[i] + PAGE_SIZE)
|
|
||||||
|
|
||||||
Fixed similar bugs in WATER-NSQUARED and WATER-SPATIAL. Both
|
|
||||||
codes needed a barrier added into the mdmain.C files. In both
|
|
||||||
codes, the line
|
|
||||||
|
|
||||||
BARRIER(gl->start, NumProcs);
|
|
||||||
|
|
||||||
was added. In WATER-NSQUARED, it was added in mdmain.C at line
|
|
||||||
84. In WATER-SPATIAL, it was added in mdmain.C at line 107.
|
|
|
@ -1,124 +0,0 @@
|
||||||
We are pleased to announce the release of the SPLASH-2 suite of
|
|
||||||
multiprocessor applications. SPLASH-2 is the successor to the SPLASH
|
|
||||||
suite that we previously released, and the programs in it are also
|
|
||||||
written assuming a coherent shared address space communication model.
|
|
||||||
SPLASH-2 contains several new applications, as well as improved versions
|
|
||||||
of applications from SPLASH. The suite is currently available via
|
|
||||||
anonymous ftp to
|
|
||||||
|
|
||||||
www-flash.stanford.edu (in the pub/splash2 subdirectory)
|
|
||||||
|
|
||||||
and via the World-Wide-Web at
|
|
||||||
|
|
||||||
http://www-flash.stanford.edu/apps/SPLASH/
|
|
||||||
|
|
||||||
Several programs are currently available, and a few others will be added
|
|
||||||
shortly. The programs fall into two categories: full applications and
|
|
||||||
kernels. Additionally, we designate some of these as "core programs"
|
|
||||||
(see below). The applications and kernels currently available in the
|
|
||||||
SPLASH-2 suite include:
|
|
||||||
|
|
||||||
Applications:
|
|
||||||
Ocean Simulation
|
|
||||||
Ray Tracer
|
|
||||||
Hierarchical Radiosity
|
|
||||||
Volume Renderer
|
|
||||||
Water Simulation with Spatial Data Structure
|
|
||||||
Water Simulation without Spatial Data Structure
|
|
||||||
Barnes-Hut (gravitational N-body simulation)
|
|
||||||
Adaptive Fast Multipole (gravitational N-body simulation)
|
|
||||||
|
|
||||||
Kernels:
|
|
||||||
FFT
|
|
||||||
Blocked LU Decomposition
|
|
||||||
Blocked Sparse Cholesky Factorization
|
|
||||||
Radix Sort
|
|
||||||
|
|
||||||
Programs that will appear soon include:
|
|
||||||
|
|
||||||
PSIM4 - Particle Dynamics Simulation (full application)
|
|
||||||
Conjugate Gradient (kernel)
|
|
||||||
LocusRoute (standard cell router from SPLASH)
|
|
||||||
Protein Structure Prediction
|
|
||||||
Protein Sequencing
|
|
||||||
Parallel Probabilistic Inference
|
|
||||||
|
|
||||||
In some cases, we provide both well-optimized and less-optimized versions
|
|
||||||
of the programs. For both the Ocean simulation and the Blocked LU
|
|
||||||
Decomposition kernel, less optimized versions of the codes are currently
|
|
||||||
available.
|
|
||||||
|
|
||||||
There are important differences between applications in the SPLASH-2 suite
|
|
||||||
and applications in the SPLASH suite. These differences are noted in the
|
|
||||||
README.SPLASH2 file in the pub/splash2 directory. It is *VERY IMPORTANT*
|
|
||||||
that you read the README.SPLASH2 file, as well as the individual README
|
|
||||||
files in the program directories, before using the SPLASH-2 programs.
|
|
||||||
These files describe how to run the programs, provide commented annotations
|
|
||||||
about how to distribute data on a machine with physically distributed main
|
|
||||||
memory, and provides guidelines on the baseline problem sizes to use when
|
|
||||||
studying architectural interactions through simulation.
|
|
||||||
|
|
||||||
Complete documentation of SPLASH2, including a detailed characterization
|
|
||||||
of performance as well as memory system interactions and synchronization
|
|
||||||
behavior, will appear in the SPLASH2 report that is currently being
|
|
||||||
written.
|
|
||||||
|
|
||||||
|
|
||||||
OPTIMIZATION STRATEGY:
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
For each application and kernel, we note potential features or
|
|
||||||
enhancements that are typically machine-specific. These potential
|
|
||||||
enhancements are encapsulated within comments in the code starting with
|
|
||||||
the string "POSSIBLE ENHANCEMENT." The potential enhancements which we
|
|
||||||
identify are:
|
|
||||||
|
|
||||||
(1) Data Distribution
|
|
||||||
|
|
||||||
We note where data migration routines should be called in order to
|
|
||||||
enhance locality of data access. We do not distribute data by
|
|
||||||
default as different machines implement migration routines in
|
|
||||||
different ways, and on some machines this is not relevant.
|
|
||||||
|
|
||||||
(2) Process-to-Processor Assignment
|
|
||||||
|
|
||||||
We note where calls can be made to "pin" processes to specific
|
|
||||||
processors so that process migration can be avoided. We do not
|
|
||||||
do this by default, since different machines implement this
|
|
||||||
feature in different ways.
|
|
||||||
|
|
||||||
In addition, to facilitate simulation studies, we note points in the
|
|
||||||
codes where statistics gathering routines should be turned on so that
|
|
||||||
cold-start and initialization effects can be avoided.
|
|
||||||
|
|
||||||
For two programs (Ocean and LU), we provide less-optimized versions of
|
|
||||||
the codes. The less-optimized versions utilize data structures that
|
|
||||||
lead to simpler implementations, but which do not allow for optimal data
|
|
||||||
distribution (and can generate false-sharing).
|
|
||||||
|
|
||||||
|
|
||||||
CORE PROGRAMS:
|
|
||||||
--------------
|
|
||||||
|
|
||||||
Since the number of programs has increased over SPLASH, and since not
|
|
||||||
everyone may be able to use all the programs in a given study, we
|
|
||||||
identify some of the programs as "core" programs that should be used
|
|
||||||
in most studies for comparability. In the currently available set,
|
|
||||||
these core programs include:
|
|
||||||
|
|
||||||
(1) Ocean Simulation
|
|
||||||
(2) Hierarchical Radiosity
|
|
||||||
(3) Water Simulation with Spatial data structure
|
|
||||||
(4) Barnes-Hut
|
|
||||||
(5) FFT
|
|
||||||
(6) Blocked Sparse Cholesky Factorization
|
|
||||||
(7) Radix Sort
|
|
||||||
|
|
||||||
The less optimized versions of the programs, when available, should be
|
|
||||||
used only in addition to these.
|
|
||||||
|
|
||||||
The base problem sizes that we recommend are provided in the README files
|
|
||||||
for individual applications. Please use at least these for experiments
|
|
||||||
with upto 64 processors. If changes are made to these base parameters
|
|
||||||
for further experimentation, these changes should be explicitly stated
|
|
||||||
in any results that are presented.
|
|
|
@ -1,34 +0,0 @@
|
||||||
CC := gcc
|
|
||||||
CFLAGS := -static -O3 -pthread -D_POSIX_C_SOURCE=200112
|
|
||||||
#CFLAGS := -g3 -pthread -D_POSIX_C_SOURCE=200112
|
|
||||||
CFLAGS := $(CFLAGS) -Wall -W -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls -Wdisabled-optimization
|
|
||||||
CFLAGS := $(CFLAGS) -Wpadded -Winline -Wpointer-arith -Wsign-compare -Wendif-labels
|
|
||||||
LDFLAGS := -lm libpthread.a m5op_x86.o
|
|
||||||
|
|
||||||
BASEDIR := $(HOME)/GitSource/gem5/splash2/codes
|
|
||||||
MACROS := $(BASEDIR)/null_macros/c.m4.null.POSIX
|
|
||||||
M4 := m4 -s -Ulen -Uindex
|
|
||||||
|
|
||||||
x = *
|
|
||||||
|
|
||||||
$(TARGET): $(OBJS)
|
|
||||||
$(CC) $(OBJS) $(CFLAGS) -o $(TARGET) $(LDFLAGS)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf *.c *.h *.o $(TARGET)
|
|
||||||
|
|
||||||
.SUFFIXES:
|
|
||||||
.SUFFIXES: .o .c .C .h .H
|
|
||||||
|
|
||||||
.H.h:
|
|
||||||
$(M4) $(MACROS) $*.H > $*.h
|
|
||||||
|
|
||||||
.C.c:
|
|
||||||
$(M4) $(MACROS) $*.C > $*.c
|
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) -c $(CFLAGS) $*.c
|
|
||||||
|
|
||||||
.C.o:
|
|
||||||
$(M4) $(MACROS) $*.C > $*.c
|
|
||||||
$(CC) -c $(CFLAGS) $*.c
|
|
|
@ -1,13 +0,0 @@
|
||||||
TARGET = BARNES
|
|
||||||
OBJS = code.o code_io.o load.o grav.o getparam.o util.o
|
|
||||||
|
|
||||||
include ../../Makefile.config
|
|
||||||
|
|
||||||
stdinc.h: code.h defs.h util.h vectmath.h load.h code_io.h grav.h getparam.h stdinc.H
|
|
||||||
code.o: code.C stdinc.h
|
|
||||||
code_io.o: code_io.C stdinc.h
|
|
||||||
getparam.o: getparam.C stdinc.h
|
|
||||||
grav.o: grav.C stdinc.h
|
|
||||||
load.o: load.C stdinc.h
|
|
||||||
util.o: util.C stdinc.h
|
|
||||||
|
|
|
@ -1,50 +0,0 @@
|
||||||
GENERAL INFORMATION:
|
|
||||||
|
|
||||||
The BARNES application implements the Barnes-Hut method to simulate the
|
|
||||||
interaction of a system of bodies (N-body problem). A general description
|
|
||||||
of the Barnes-Hut method can be found in:
|
|
||||||
|
|
||||||
Singh, J. P. Parallel Hierarchical N-body Methods and Their Implications
|
|
||||||
for Multiprocessors. PhD Thesis, Stanford University, February 1993.
|
|
||||||
|
|
||||||
The SPLASH-2 implementation allows for multiple particles to be stored in
|
|
||||||
each leaf cell of the space partition. A description of this feature
|
|
||||||
can be found in:
|
|
||||||
|
|
||||||
Holt, C. and Singh, J. P. Hierarchical N-Body Methods on Shared Address
|
|
||||||
Space Multiprocessors. SIAM Conference on Parallel Processing
|
|
||||||
for Scientific Computing, Feb 1995, to appear.
|
|
||||||
|
|
||||||
RUNNING THE PROGRAM:
|
|
||||||
|
|
||||||
To see how to run the program, please see the comment at the top of the
|
|
||||||
file code.C, or run the application with the "-h" command line option.
|
|
||||||
The input parameters should be placed in a file and redirected to standard
|
|
||||||
input. Of the twelve input parameters, the ones which would normally be
|
|
||||||
varied are the number of particles and the number of processors. If other
|
|
||||||
parameters are changed, these changes should be reported in any results
|
|
||||||
that are presented.
|
|
||||||
|
|
||||||
The only compile time option, -DQUADPOLE, controls the use of quadpole
|
|
||||||
interactions during the force computation. For the input parameters
|
|
||||||
provided, the -DQUADPOLE option should not be defined. The constant
|
|
||||||
MAX_BODIES_PER_LEAF defines the maximum number of particles per leaf
|
|
||||||
cell in the tree. This constant also affects the parameter "fleaves" in
|
|
||||||
the input file, which controls how many leaf cells space is allocated for.
|
|
||||||
The higher the value of MAX_BODIES_PER_LEAF, the lower fleaves should be.
|
|
||||||
Both these parameters should be kept at their default values for base
|
|
||||||
SPLASH-2 runs. If changes are made, they should be reported in any results
|
|
||||||
that are presented.
|
|
||||||
|
|
||||||
BASE PROBLEM SIZE:
|
|
||||||
|
|
||||||
The base problem size for an upto-64 processor machine is 16384 particles.
|
|
||||||
For this many particles, you can use the input file provided (and change
|
|
||||||
only the number of processors).
|
|
||||||
|
|
||||||
DATA DISTRIBUTION:
|
|
||||||
|
|
||||||
Our "POSSIBLE ENHANCEMENT" comments in the source code tell where one
|
|
||||||
might want to distribute data and how. Data distribution, however, does
|
|
||||||
not make much difference to performance on the Stanford DASH
|
|
||||||
multiprocessor.
|
|
|
@ -1,922 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
/*
|
|
||||||
Usage: BARNES <options> < inputfile
|
|
||||||
|
|
||||||
Command line options:
|
|
||||||
|
|
||||||
-h : Print out input file description
|
|
||||||
|
|
||||||
Input parameters should be placed in a file and redirected through
|
|
||||||
standard input. There are a total of twelve parameters, and all of
|
|
||||||
them have default values.
|
|
||||||
|
|
||||||
1) infile (char*) : The name of an input file that contains particle
|
|
||||||
data.
|
|
||||||
|
|
||||||
The format of the file is:
|
|
||||||
a) An int representing the number of particles in the distribution
|
|
||||||
b) An int representing the dimensionality of the problem (3-D)
|
|
||||||
c) A double representing the current time of the simulation
|
|
||||||
d) Doubles representing the masses of all the particles
|
|
||||||
e) A vector (length equal to the dimensionality) of doubles
|
|
||||||
representing the positions of all the particles
|
|
||||||
f) A vector (length equal to the dimensionality) of doubles
|
|
||||||
representing the velocities of all the particles
|
|
||||||
|
|
||||||
Each of these numbers can be separated by any amount of whitespace.
|
|
||||||
2) nbody (int) : If no input file is specified (the first line is
|
|
||||||
blank), this number specifies the number of particles to generate
|
|
||||||
under a plummer model. Default is 16384.
|
|
||||||
3) seed (int) : The seed used by the random number generator.
|
|
||||||
Default is 123.
|
|
||||||
4) outfile (char*) : The name of the file that snapshots will be
|
|
||||||
printed to. This feature has been disabled in the SPLASH release.
|
|
||||||
Default is NULL.
|
|
||||||
5) dtime (double) : The integration time-step.
|
|
||||||
Default is 0.025.
|
|
||||||
6) eps (double) : The usual potential softening
|
|
||||||
Default is 0.05.
|
|
||||||
7) tol (double) : The cell subdivision tolerance.
|
|
||||||
Default is 1.0.
|
|
||||||
8) fcells (double) : Number of cells created = fcells * number of
|
|
||||||
leaves.
|
|
||||||
Default is 2.0.
|
|
||||||
9) fleaves (double) : Number of leaves created = fleaves * nbody.
|
|
||||||
Default is 0.5.
|
|
||||||
10) tstop (double) : The time to stop integration.
|
|
||||||
Default is 0.075.
|
|
||||||
11) dtout (double) : The data-output interval.
|
|
||||||
Default is 0.25.
|
|
||||||
12) NPROC (int) : The number of processors.
|
|
||||||
Default is 1.
|
|
||||||
*/
|
|
||||||
|
|
||||||
MAIN_ENV
|
|
||||||
|
|
||||||
#define global /* nada */
|
|
||||||
|
|
||||||
#include "stdinc.h"
|
|
||||||
|
|
||||||
string defv[] = { /* DEFAULT PARAMETER VALUES */
|
|
||||||
/* file names for input/output */
|
|
||||||
"in=", /* snapshot of initial conditions */
|
|
||||||
"out=", /* stream of output snapshots */
|
|
||||||
|
|
||||||
/* params, used if no input specified, to make a Plummer Model */
|
|
||||||
"nbody=16384", /* number of particles to generate */
|
|
||||||
"seed=123", /* random number generator seed */
|
|
||||||
|
|
||||||
/* params to control N-body integration */
|
|
||||||
"dtime=0.025", /* integration time-step */
|
|
||||||
"eps=0.05", /* usual potential softening */
|
|
||||||
"tol=1.0", /* cell subdivision tolerence */
|
|
||||||
"fcells=2.0", /* cell allocation parameter */
|
|
||||||
"fleaves=0.5", /* leaf allocation parameter */
|
|
||||||
|
|
||||||
"tstop=0.075", /* time to stop integration */
|
|
||||||
"dtout=0.25", /* data-output interval */
|
|
||||||
|
|
||||||
"NPROC=1", /* number of processors */
|
|
||||||
};
|
|
||||||
|
|
||||||
/* The more complicated 3D case */
|
|
||||||
#define NUM_DIRECTIONS 32
|
|
||||||
#define BRC_FUC 0
|
|
||||||
#define BRC_FRA 1
|
|
||||||
#define BRA_FDA 2
|
|
||||||
#define BRA_FRC 3
|
|
||||||
#define BLC_FDC 4
|
|
||||||
#define BLC_FLA 5
|
|
||||||
#define BLA_FUA 6
|
|
||||||
#define BLA_FLC 7
|
|
||||||
#define BUC_FUA 8
|
|
||||||
#define BUC_FLC 9
|
|
||||||
#define BUA_FUC 10
|
|
||||||
#define BUA_FRA 11
|
|
||||||
#define BDC_FDA 12
|
|
||||||
#define BDC_FRC 13
|
|
||||||
#define BDA_FDC 14
|
|
||||||
#define BDA_FLA 15
|
|
||||||
|
|
||||||
#define FRC_BUC 16
|
|
||||||
#define FRC_BRA 17
|
|
||||||
#define FRA_BDA 18
|
|
||||||
#define FRA_BRC 19
|
|
||||||
#define FLC_BDC 20
|
|
||||||
#define FLC_BLA 21
|
|
||||||
#define FLA_BUA 22
|
|
||||||
#define FLA_BLC 23
|
|
||||||
#define FUC_BUA 24
|
|
||||||
#define FUC_BLC 25
|
|
||||||
#define FUA_BUC 26
|
|
||||||
#define FUA_BRA 27
|
|
||||||
#define FDC_BDA 28
|
|
||||||
#define FDC_BRC 29
|
|
||||||
#define FDA_BDC 30
|
|
||||||
#define FDA_BLA 31
|
|
||||||
|
|
||||||
static long Child_Sequence[NUM_DIRECTIONS][NSUB] =
|
|
||||||
{
|
|
||||||
{ 2, 5, 6, 1, 0, 3, 4, 7}, /* BRC_FUC */
|
|
||||||
{ 2, 5, 6, 1, 0, 7, 4, 3}, /* BRC_FRA */
|
|
||||||
{ 1, 6, 5, 2, 3, 0, 7, 4}, /* BRA_FDA */
|
|
||||||
{ 1, 6, 5, 2, 3, 4, 7, 0}, /* BRA_FRC */
|
|
||||||
{ 6, 1, 2, 5, 4, 7, 0, 3}, /* BLC_FDC */
|
|
||||||
{ 6, 1, 2, 5, 4, 3, 0, 7}, /* BLC_FLA */
|
|
||||||
{ 5, 2, 1, 6, 7, 4, 3, 0}, /* BLA_FUA */
|
|
||||||
{ 5, 2, 1, 6, 7, 0, 3, 4}, /* BLA_FLC */
|
|
||||||
{ 1, 2, 5, 6, 7, 4, 3, 0}, /* BUC_FUA */
|
|
||||||
{ 1, 2, 5, 6, 7, 0, 3, 4}, /* BUC_FLC */
|
|
||||||
{ 6, 5, 2, 1, 0, 3, 4, 7}, /* BUA_FUC */
|
|
||||||
{ 6, 5, 2, 1, 0, 7, 4, 3}, /* BUA_FRA */
|
|
||||||
{ 5, 6, 1, 2, 3, 0, 7, 4}, /* BDC_FDA */
|
|
||||||
{ 5, 6, 1, 2, 3, 4, 7, 0}, /* BDC_FRC */
|
|
||||||
{ 2, 1, 6, 5, 4, 7, 0, 3}, /* BDA_FDC */
|
|
||||||
{ 2, 1, 6, 5, 4, 3, 0, 7}, /* BDA_FLA */
|
|
||||||
|
|
||||||
{ 3, 4, 7, 0, 1, 2, 5, 6}, /* FRC_BUC */
|
|
||||||
{ 3, 4, 7, 0, 1, 6, 5, 2}, /* FRC_BRA */
|
|
||||||
{ 0, 7, 4, 3, 2, 1, 6, 5}, /* FRA_BDA */
|
|
||||||
{ 0, 7, 4, 3, 2, 5, 6, 1}, /* FRA_BRC */
|
|
||||||
{ 7, 0, 3, 4, 5, 6, 1, 2}, /* FLC_BDC */
|
|
||||||
{ 7, 0, 3, 4, 5, 2, 1, 6}, /* FLC_BLA */
|
|
||||||
{ 4, 3, 0, 7, 6, 5, 2, 1}, /* FLA_BUA */
|
|
||||||
{ 4, 3, 0, 7, 6, 1, 2, 5}, /* FLA_BLC */
|
|
||||||
{ 0, 3, 4, 7, 6, 5, 2, 1}, /* FUC_BUA */
|
|
||||||
{ 0, 3, 4, 7, 6, 1, 2, 5}, /* FUC_BLC */
|
|
||||||
{ 7, 4, 3, 0, 1, 2, 5, 6}, /* FUA_BUC */
|
|
||||||
{ 7, 4, 3, 0, 1, 6, 5, 2}, /* FUA_BRA */
|
|
||||||
{ 4, 7, 0, 3, 2, 1, 6, 5}, /* FDC_BDA */
|
|
||||||
{ 4, 7, 0, 3, 2, 5, 6, 1}, /* FDC_BRC */
|
|
||||||
{ 3, 0, 7, 4, 5, 6, 1, 2}, /* FDA_BDC */
|
|
||||||
{ 3, 0, 7, 4, 5, 2, 1, 6}, /* FDA_BLA */
|
|
||||||
};
|
|
||||||
|
|
||||||
static long Direction_Sequence[NUM_DIRECTIONS][NSUB] =
|
|
||||||
{
|
|
||||||
{ FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
|
||||||
/* BRC_FUC */
|
|
||||||
{ FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC },
|
|
||||||
/* BRC_FRA */
|
|
||||||
{ FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC },
|
|
||||||
/* BRA_FDA */
|
|
||||||
{ FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
|
||||||
/* BRA_FRC */
|
|
||||||
{ FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA },
|
|
||||||
/* BLC_FDC */
|
|
||||||
{ FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC },
|
|
||||||
/* BLC_FLA */
|
|
||||||
{ FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC },
|
|
||||||
/* BLA_FUA */
|
|
||||||
{ FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA },
|
|
||||||
/* BLA_FLC */
|
|
||||||
{ FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC },
|
|
||||||
/* BUC_FUA */
|
|
||||||
{ FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA, BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA },
|
|
||||||
/* BUC_FLC */
|
|
||||||
{ FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
|
||||||
/* BUA_FUC */
|
|
||||||
{ FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC, BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC },
|
|
||||||
/* BUA_FRA */
|
|
||||||
{ FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC },
|
|
||||||
/* BDC_FDA */
|
|
||||||
{ FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA, BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA },
|
|
||||||
/* BDC_FRC */
|
|
||||||
{ FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA },
|
|
||||||
/* BDA_FDC */
|
|
||||||
{ FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC, BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC },
|
|
||||||
/* BDA_FLA */
|
|
||||||
|
|
||||||
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA },
|
|
||||||
/* FRC_BUC */
|
|
||||||
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC },
|
|
||||||
/* FRC_BRA */
|
|
||||||
{ BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC },
|
|
||||||
/* FRA_BDA */
|
|
||||||
{ BRA_FDA, FRC_BRA, BUC_FUA, FLC_BDC, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA },
|
|
||||||
/* FRA_BRC */
|
|
||||||
{ BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA },
|
|
||||||
/* FLC_BDC */
|
|
||||||
{ BLC_FDC, FLA_BLC, BUA_FUC, FRA_BDA, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC },
|
|
||||||
/* FLC_BLA */
|
|
||||||
{ BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC },
|
|
||||||
/* FLA_BUA */
|
|
||||||
{ BLA_FUA, FLC_BLA, BDC_FDA, FRC_BUC, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA },
|
|
||||||
/* FLA_BLC */
|
|
||||||
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FUA_BRA, BUC_FUA, FLC_BLA, BDC_FRC },
|
|
||||||
/* FUC_BUA */
|
|
||||||
{ BUC_FLC, FUA_BUC, BRA_FRC, FDA_BLA, FLC_BDC, BLA_FLC, FUA_BUC, BRA_FDA },
|
|
||||||
/* FUC_BLC */
|
|
||||||
{ BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FUC_BLC, BUA_FUC, FRA_BRC, BDA_FLA },
|
|
||||||
/* FUA_BUC */
|
|
||||||
{ BUA_FRA, FUC_BUA, BLC_FLA, FDC_BRC, FRA_BDA, BRC_FRA, FUC_BUA, BLC_FDC },
|
|
||||||
/* FUA_BRA */
|
|
||||||
{ BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FDA_BLA, BDC_FDA, FRC_BRA, BUC_FLC },
|
|
||||||
/* FDC_BDA */
|
|
||||||
{ BDC_FRC, FDA_BDC, BLA_FLC, FUA_BRA, FRC_BUC, BRA_FRC, FDA_BDC, BLA_FUA },
|
|
||||||
/* FDC_BRC */
|
|
||||||
{ BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FDC_BRC, BDA_FDC, FLA_BLC, BUA_FRA },
|
|
||||||
/* FDA_BDC */
|
|
||||||
{ BDA_FLA, FDC_BDA, BRC_FRA, FUC_BLC, FLA_BUA, BLC_FLA, FDC_BDA, BRC_FUC },
|
|
||||||
/* FDA_BLA */
|
|
||||||
};
|
|
||||||
|
|
||||||
int main (int argc, string argv[])
|
|
||||||
{
|
|
||||||
long c;
|
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "h")) != -1) {
|
|
||||||
switch(c) {
|
|
||||||
case 'h':
|
|
||||||
Help();
|
|
||||||
exit(-1);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fprintf(stderr, "Only valid option is \"-h\".\n");
|
|
||||||
exit(-1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Global = NULL;
|
|
||||||
initparam(defv);
|
|
||||||
startrun();
|
|
||||||
initoutput();
|
|
||||||
tab_init();
|
|
||||||
|
|
||||||
Global->tracktime = 0;
|
|
||||||
Global->partitiontime = 0;
|
|
||||||
Global->treebuildtime = 0;
|
|
||||||
Global->forcecalctime = 0;
|
|
||||||
Global->current_id = 0;
|
|
||||||
|
|
||||||
CLOCK(Global->computestart);
|
|
||||||
|
|
||||||
printf("COMPUTESTART = %12lu\n",Global->computestart);
|
|
||||||
|
|
||||||
CREATE(SlaveStart, NPROC);
|
|
||||||
|
|
||||||
WAIT_FOR_END(NPROC);
|
|
||||||
|
|
||||||
CLOCK(Global->computeend);
|
|
||||||
|
|
||||||
printf("COMPUTEEND = %12lu\n",Global->computeend);
|
|
||||||
printf("COMPUTETIME = %12lu\n",Global->computeend - Global->computestart);
|
|
||||||
printf("TRACKTIME = %12lu\n",Global->tracktime);
|
|
||||||
printf("PARTITIONTIME = %12lu\t%5.2f\n",Global->partitiontime,
|
|
||||||
((float)Global->partitiontime)/Global->tracktime);
|
|
||||||
printf("TREEBUILDTIME = %12lu\t%5.2f\n",Global->treebuildtime,
|
|
||||||
((float)Global->treebuildtime)/Global->tracktime);
|
|
||||||
printf("FORCECALCTIME = %12lu\t%5.2f\n",Global->forcecalctime,
|
|
||||||
((float)Global->forcecalctime)/Global->tracktime);
|
|
||||||
printf("RESTTIME = %12lu\t%5.2f\n",
|
|
||||||
Global->tracktime - Global->partitiontime -
|
|
||||||
Global->treebuildtime - Global->forcecalctime,
|
|
||||||
((float)(Global->tracktime-Global->partitiontime-
|
|
||||||
Global->treebuildtime-Global->forcecalctime))/
|
|
||||||
Global->tracktime);
|
|
||||||
MAIN_END;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ANLINIT : initialize ANL macros
|
|
||||||
*/
|
|
||||||
void ANLinit()
|
|
||||||
{
|
|
||||||
MAIN_INITENV(,70000000,);
|
|
||||||
/* Allocate global, shared memory */
|
|
||||||
|
|
||||||
Global = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory));
|
|
||||||
if (Global==NULL) error("No initialization for Global\n");
|
|
||||||
|
|
||||||
BARINIT(Global->Barrier, NPROC);
|
|
||||||
|
|
||||||
LOCKINIT(Global->CountLock);
|
|
||||||
LOCKINIT(Global->io_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* INIT_ROOT: Processor 0 reinitialize the global root at each time step
|
|
||||||
*/
|
|
||||||
void init_root()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
Global->G_root=Local[0].ctab;
|
|
||||||
Global->G_root->seqnum = 0;
|
|
||||||
Type(Global->G_root) = CELL;
|
|
||||||
Done(Global->G_root) = FALSE;
|
|
||||||
Level(Global->G_root) = IMAX >> 1;
|
|
||||||
for (i = 0; i < NSUB; i++) {
|
|
||||||
Subp(Global->G_root)[i] = NULL;
|
|
||||||
}
|
|
||||||
Local[0].mynumcell=1;
|
|
||||||
}
|
|
||||||
|
|
||||||
long Log_base_2(long number)
|
|
||||||
{
|
|
||||||
long cumulative;
|
|
||||||
long out;
|
|
||||||
|
|
||||||
cumulative = 1;
|
|
||||||
for (out = 0; out < 20; out++) {
|
|
||||||
if (cumulative == number) {
|
|
||||||
return(out);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
cumulative = cumulative * 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr,"Log_base_2: couldn't find log2 of %ld\n", number);
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TAB_INIT : allocate body and cell data space
|
|
||||||
*/
|
|
||||||
|
|
||||||
void tab_init()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
/*allocate leaf/cell space */
|
|
||||||
maxleaf = (long) ((double) fleaves * nbody);
|
|
||||||
maxcell = fcells * maxleaf;
|
|
||||||
for (i = 0; i < NPROC; ++i) {
|
|
||||||
Local[i].ctab = (cellptr) G_MALLOC((maxcell / NPROC) * sizeof(cell));
|
|
||||||
Local[i].ltab = (leafptr) G_MALLOC((maxleaf / NPROC) * sizeof(leaf));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*allocate space for personal lists of body pointers */
|
|
||||||
maxmybody = (nbody+maxleaf*MAX_BODIES_PER_LEAF)/NPROC;
|
|
||||||
Local[0].mybodytab = (bodyptr*) G_MALLOC(NPROC*maxmybody*sizeof(bodyptr));
|
|
||||||
/* space is allocated so that every */
|
|
||||||
/* process can have a maximum of maxmybody pointers to bodies */
|
|
||||||
/* then there is an array of bodies called bodytab which is */
|
|
||||||
/* allocated in the distribution generation or when the distr. */
|
|
||||||
/* file is read */
|
|
||||||
maxmycell = maxcell / NPROC;
|
|
||||||
maxmyleaf = maxleaf / NPROC;
|
|
||||||
Local[0].mycelltab = (cellptr*) G_MALLOC(NPROC*maxmycell*sizeof(cellptr));
|
|
||||||
Local[0].myleaftab = (leafptr*) G_MALLOC(NPROC*maxmyleaf*sizeof(leafptr));
|
|
||||||
|
|
||||||
CellLock = (struct CellLockType *) G_MALLOC(sizeof(struct CellLockType));
|
|
||||||
ALOCKINIT(CellLock->CL,MAXLOCK);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SLAVESTART: main task for each processor
|
|
||||||
*/
|
|
||||||
void SlaveStart()
|
|
||||||
{
|
|
||||||
long ProcessId;
|
|
||||||
|
|
||||||
/* Get unique ProcessId */
|
|
||||||
LOCK(Global->CountLock);
|
|
||||||
ProcessId = Global->current_id++;
|
|
||||||
UNLOCK(Global->CountLock);
|
|
||||||
|
|
||||||
BARINCLUDE(Global->Barrier);
|
|
||||||
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
|
||||||
processors to avoid migration */
|
|
||||||
|
|
||||||
/* initialize mybodytabs */
|
|
||||||
Local[ProcessId].mybodytab = Local[0].mybodytab + (maxmybody * ProcessId);
|
|
||||||
/* note that every process has its own copy */
|
|
||||||
/* of mybodytab, which was initialized to the */
|
|
||||||
/* beginning of the whole array by proc. 0 */
|
|
||||||
/* before create */
|
|
||||||
Local[ProcessId].mycelltab = Local[0].mycelltab + (maxmycell * ProcessId);
|
|
||||||
Local[ProcessId].myleaftab = Local[0].myleaftab + (maxmyleaf * ProcessId);
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute the
|
|
||||||
data across physically distributed memories as desired.
|
|
||||||
|
|
||||||
One way to do this is as follows:
|
|
||||||
|
|
||||||
long i;
|
|
||||||
|
|
||||||
if (ProcessId == 0) {
|
|
||||||
for (i=0;i<NPROC;i++) {
|
|
||||||
Place all addresses x such that
|
|
||||||
&(Local[i]) <= x < &(Local[i])+
|
|
||||||
sizeof(struct local_memory) on node i
|
|
||||||
Place all addresses x such that
|
|
||||||
&(Local[i].mybodytab) <= x < &(Local[i].mybodytab)+
|
|
||||||
maxmybody * sizeof(bodyptr) - 1 on node i
|
|
||||||
Place all addresses x such that
|
|
||||||
&(Local[i].mycelltab) <= x < &(Local[i].mycelltab)+
|
|
||||||
maxmycell * sizeof(cellptr) - 1 on node i
|
|
||||||
Place all addresses x such that
|
|
||||||
&(Local[i].myleaftab) <= x < &(Local[i].myleaftab)+
|
|
||||||
maxmyleaf * sizeof(leafptr) - 1 on node i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
barrier(Global->Barstart,NPROC);
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
Local[ProcessId].tout = Local[0].tout;
|
|
||||||
Local[ProcessId].tnow = Local[0].tnow;
|
|
||||||
Local[ProcessId].nstep = Local[0].nstep;
|
|
||||||
|
|
||||||
find_my_initial_bodies(bodytab, nbody, ProcessId);
|
|
||||||
|
|
||||||
/* main loop */
|
|
||||||
while (Local[ProcessId].tnow < tstop + 0.1 * dtime) {
|
|
||||||
stepsystem(ProcessId);
|
|
||||||
// printtree(Global->G_root);
|
|
||||||
// printf("Going to next step!!!\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* STARTRUN: startup hierarchical N-body code.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void startrun()
|
|
||||||
{
|
|
||||||
long seed;
|
|
||||||
|
|
||||||
infile = getparam("in");
|
|
||||||
if (*infile != '\0'/*NULL*/) {
|
|
||||||
inputdata();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
nbody = getiparam("nbody");
|
|
||||||
if (nbody < 1) {
|
|
||||||
error("startrun: absurd nbody\n");
|
|
||||||
}
|
|
||||||
seed = getiparam("seed");
|
|
||||||
}
|
|
||||||
|
|
||||||
outfile = getparam("out");
|
|
||||||
dtime = getdparam("dtime");
|
|
||||||
dthf = 0.5 * dtime;
|
|
||||||
eps = getdparam("eps");
|
|
||||||
epssq = eps*eps;
|
|
||||||
tol = getdparam("tol");
|
|
||||||
tolsq = tol*tol;
|
|
||||||
fcells = getdparam("fcells");
|
|
||||||
fleaves = getdparam("fleaves");
|
|
||||||
tstop = getdparam("tstop");
|
|
||||||
dtout = getdparam("dtout");
|
|
||||||
NPROC = getiparam("NPROC");
|
|
||||||
Local[0].nstep = 0;
|
|
||||||
pranset(seed);
|
|
||||||
testdata();
|
|
||||||
ANLinit();
|
|
||||||
setbound();
|
|
||||||
Local[0].tout = Local[0].tnow + dtout;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TESTDATA: generate Plummer model initial conditions for test runs,
|
|
||||||
* scaled to units such that M = -4E = G = 1 (Henon, Hegge, etc).
|
|
||||||
* See Aarseth, SJ, Henon, M, & Wielen, R (1974) Astr & Ap, 37, 183.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define MFRAC 0.999 /* mass cut off at MFRAC of total */
|
|
||||||
|
|
||||||
void testdata()
|
|
||||||
{
|
|
||||||
real rsc, vsc, r, v, x, y;
|
|
||||||
vector cmr, cmv;
|
|
||||||
register bodyptr p;
|
|
||||||
long rejects = 0;
|
|
||||||
long halfnbody, i;
|
|
||||||
float offset;
|
|
||||||
register bodyptr cp;
|
|
||||||
|
|
||||||
headline = "Hack code: Plummer model";
|
|
||||||
Local[0].tnow = 0.0;
|
|
||||||
bodytab = (bodyptr) G_MALLOC(nbody * sizeof(body));
|
|
||||||
if (bodytab == NULL) {
|
|
||||||
error("testdata: not enough memory\n");
|
|
||||||
}
|
|
||||||
rsc = 9 * PI / 16;
|
|
||||||
vsc = sqrt(1.0 / rsc);
|
|
||||||
|
|
||||||
CLRV(cmr);
|
|
||||||
CLRV(cmv);
|
|
||||||
|
|
||||||
halfnbody = nbody / 2;
|
|
||||||
if (nbody % 2 != 0) halfnbody++;
|
|
||||||
for (p = bodytab; p < bodytab+halfnbody; p++) {
|
|
||||||
Type(p) = BODY;
|
|
||||||
Mass(p) = 1.0 / nbody;
|
|
||||||
Cost(p) = 1;
|
|
||||||
|
|
||||||
r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1);
|
|
||||||
/* reject radii greater than 10 */
|
|
||||||
while (r > 9.0) {
|
|
||||||
rejects++;
|
|
||||||
r = 1 / sqrt(pow(xrand(0.0, MFRAC), -2.0/3.0) - 1);
|
|
||||||
}
|
|
||||||
pickshell(Pos(p), rsc * r);
|
|
||||||
ADDV(cmr, cmr, Pos(p));
|
|
||||||
do {
|
|
||||||
x = xrand(0.0, 1.0);
|
|
||||||
y = xrand(0.0, 0.1);
|
|
||||||
|
|
||||||
} while (y > x*x * pow(1 - x*x, 3.5));
|
|
||||||
|
|
||||||
v = sqrt(2.0) * x / pow(1 + r*r, 0.25);
|
|
||||||
pickshell(Vel(p), vsc * v);
|
|
||||||
ADDV(cmv, cmv, Vel(p));
|
|
||||||
}
|
|
||||||
|
|
||||||
offset = 4.0;
|
|
||||||
|
|
||||||
for (p = bodytab + halfnbody; p < bodytab+nbody; p++) {
|
|
||||||
Type(p) = BODY;
|
|
||||||
Mass(p) = 1.0 / nbody;
|
|
||||||
Cost(p) = 1;
|
|
||||||
|
|
||||||
cp = p - halfnbody;
|
|
||||||
for (i = 0; i < NDIM; i++){
|
|
||||||
Pos(p)[i] = Pos(cp)[i] + offset;
|
|
||||||
Vel(p)[i] = Vel(cp)[i];
|
|
||||||
}
|
|
||||||
ADDV(cmr, cmr, Pos(p));
|
|
||||||
ADDV(cmv, cmv, Vel(p));
|
|
||||||
}
|
|
||||||
|
|
||||||
DIVVS(cmr, cmr, (real) nbody);
|
|
||||||
DIVVS(cmv, cmv, (real) nbody);
|
|
||||||
|
|
||||||
for (p = bodytab; p < bodytab+nbody; p++) {
|
|
||||||
SUBV(Pos(p), Pos(p), cmr);
|
|
||||||
SUBV(Vel(p), Vel(p), cmv);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PICKSHELL: pick a random point on a sphere of specified radius.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void pickshell(real vec[], real rad)
|
|
||||||
{
|
|
||||||
register long k;
|
|
||||||
double rsq, rsc;
|
|
||||||
|
|
||||||
do {
|
|
||||||
for (k = 0; k < NDIM; k++) {
|
|
||||||
vec[k] = xrand(-1.0, 1.0);
|
|
||||||
}
|
|
||||||
DOTVP(rsq, vec, vec);
|
|
||||||
} while (rsq > 1.0);
|
|
||||||
|
|
||||||
rsc = rad / sqrt(rsq);
|
|
||||||
MULVS(vec, vec, rsc);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
long intpow(long i, long j)
|
|
||||||
{
|
|
||||||
long k;
|
|
||||||
long temp = 1;
|
|
||||||
|
|
||||||
for (k = 0; k < j; k++)
|
|
||||||
temp = temp*i;
|
|
||||||
return temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* STEPSYSTEM: advance N-body system one time-step.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void stepsystem(long ProcessId)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
real Cavg;
|
|
||||||
bodyptr p,*pp;
|
|
||||||
vector dvel, vel1, dpos;
|
|
||||||
long trackstart, trackend;
|
|
||||||
long partitionstart, partitionend;
|
|
||||||
long treebuildstart, treebuildend;
|
|
||||||
long forcecalcstart, forcecalcend;
|
|
||||||
|
|
||||||
if (Local[ProcessId].nstep == 2) {
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
|
||||||
statistics that one is measuring about the parallel execution */
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
||||||
CLOCK(trackstart);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ProcessId == 0) {
|
|
||||||
init_root();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Local[ProcessId].mynumcell = 0;
|
|
||||||
Local[ProcessId].mynumleaf = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* start at same time */
|
|
||||||
BARRIER(Global->Barrier,NPROC);
|
|
||||||
|
|
||||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
||||||
CLOCK(treebuildstart);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* load bodies into tree */
|
|
||||||
maketree(ProcessId);
|
|
||||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
||||||
CLOCK(treebuildend);
|
|
||||||
Global->treebuildtime += treebuildend - treebuildstart;
|
|
||||||
}
|
|
||||||
|
|
||||||
Housekeep(ProcessId);
|
|
||||||
|
|
||||||
Cavg = (real) Cost(Global->G_root) / (real)NPROC ;
|
|
||||||
Local[ProcessId].workMin = (long) (Cavg * ProcessId);
|
|
||||||
Local[ProcessId].workMax = (long) (Cavg * (ProcessId + 1)
|
|
||||||
+ (ProcessId == (NPROC - 1)));
|
|
||||||
|
|
||||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
||||||
CLOCK(partitionstart);
|
|
||||||
}
|
|
||||||
|
|
||||||
Local[ProcessId].mynbody = 0;
|
|
||||||
find_my_bodies(Global->G_root, 0, BRC_FUC, ProcessId );
|
|
||||||
|
|
||||||
/* B*RRIER(Global->Barcom,NPROC); */
|
|
||||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
||||||
CLOCK(partitionend);
|
|
||||||
Global->partitiontime += partitionend - partitionstart;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
||||||
CLOCK(forcecalcstart);
|
|
||||||
}
|
|
||||||
|
|
||||||
ComputeForces(ProcessId);
|
|
||||||
|
|
||||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
||||||
CLOCK(forcecalcend);
|
|
||||||
Global->forcecalctime += forcecalcend - forcecalcstart;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* advance my bodies */
|
|
||||||
for (pp = Local[ProcessId].mybodytab;
|
|
||||||
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody; pp++) {
|
|
||||||
p = *pp;
|
|
||||||
MULVS(dvel, Acc(p), dthf);
|
|
||||||
ADDV(vel1, Vel(p), dvel);
|
|
||||||
MULVS(dpos, vel1, dtime);
|
|
||||||
ADDV(Pos(p), Pos(p), dpos);
|
|
||||||
ADDV(Vel(p), vel1, dvel);
|
|
||||||
|
|
||||||
for (i = 0; i < NDIM; i++) {
|
|
||||||
if (Pos(p)[i]<Local[ProcessId].min[i]) {
|
|
||||||
Local[ProcessId].min[i]=Pos(p)[i];
|
|
||||||
}
|
|
||||||
if (Pos(p)[i]>Local[ProcessId].max[i]) {
|
|
||||||
Local[ProcessId].max[i]=Pos(p)[i] ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOCK(Global->CountLock);
|
|
||||||
for (i = 0; i < NDIM; i++) {
|
|
||||||
if (Global->min[i] > Local[ProcessId].min[i]) {
|
|
||||||
Global->min[i] = Local[ProcessId].min[i];
|
|
||||||
}
|
|
||||||
if (Global->max[i] < Local[ProcessId].max[i]) {
|
|
||||||
Global->max[i] = Local[ProcessId].max[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
UNLOCK(Global->CountLock);
|
|
||||||
|
|
||||||
/* bar needed to make sure that every process has computed its min */
|
|
||||||
/* and max coordinates, and has accumulated them into the global */
|
|
||||||
/* min and max, before the new dimensions are computed */
|
|
||||||
BARRIER(Global->Barrier,NPROC);
|
|
||||||
|
|
||||||
if ((ProcessId == 0) && (Local[ProcessId].nstep >= 2)) {
|
|
||||||
CLOCK(trackend);
|
|
||||||
Global->tracktime += trackend - trackstart;
|
|
||||||
}
|
|
||||||
if (ProcessId==0) {
|
|
||||||
Global->rsize=0;
|
|
||||||
SUBV(Global->max,Global->max,Global->min);
|
|
||||||
for (i = 0; i < NDIM; i++) {
|
|
||||||
if (Global->rsize < Global->max[i]) {
|
|
||||||
Global->rsize = Global->max[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ADDVS(Global->rmin,Global->min,-Global->rsize/100000.0);
|
|
||||||
Global->rsize = 1.00002*Global->rsize;
|
|
||||||
SETVS(Global->min,1E99);
|
|
||||||
SETVS(Global->max,-1E99);
|
|
||||||
}
|
|
||||||
Local[ProcessId].nstep++;
|
|
||||||
Local[ProcessId].tnow = Local[ProcessId].tnow + dtime;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void ComputeForces(long ProcessId)
|
|
||||||
{
|
|
||||||
bodyptr p,*pp;
|
|
||||||
vector acc1, dacc, dvel;
|
|
||||||
|
|
||||||
for (pp = Local[ProcessId].mybodytab;
|
|
||||||
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody;pp++) {
|
|
||||||
p = *pp;
|
|
||||||
SETV(acc1, Acc(p));
|
|
||||||
Cost(p)=0;
|
|
||||||
hackgrav(p,ProcessId);
|
|
||||||
Local[ProcessId].myn2bcalc += Local[ProcessId].myn2bterm;
|
|
||||||
Local[ProcessId].mynbccalc += Local[ProcessId].mynbcterm;
|
|
||||||
if (!Local[ProcessId].skipself) { /* did we miss self-int? */
|
|
||||||
Local[ProcessId].myselfint++; /* count another goofup */
|
|
||||||
}
|
|
||||||
if (Local[ProcessId].nstep > 0) {
|
|
||||||
/* use change in accel to make 2nd order correction to vel */
|
|
||||||
SUBV(dacc, Acc(p), acc1);
|
|
||||||
MULVS(dvel, dacc, dthf);
|
|
||||||
ADDV(Vel(p), Vel(p), dvel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* FIND_MY_INITIAL_BODIES: puts into mybodytab the initial list of bodies
|
|
||||||
* assigned to the processor.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void find_my_initial_bodies(bodyptr btab, long nbody, long ProcessId)
|
|
||||||
{
|
|
||||||
long extra,offset,i;
|
|
||||||
|
|
||||||
Local[ProcessId].mynbody = nbody / NPROC;
|
|
||||||
extra = nbody % NPROC;
|
|
||||||
if (ProcessId < extra) {
|
|
||||||
Local[ProcessId].mynbody++;
|
|
||||||
offset = Local[ProcessId].mynbody * ProcessId;
|
|
||||||
}
|
|
||||||
if (ProcessId >= extra) {
|
|
||||||
offset = (Local[ProcessId].mynbody+1) * extra + (ProcessId - extra)
|
|
||||||
* Local[ProcessId].mynbody;
|
|
||||||
}
|
|
||||||
for (i=0; i < Local[ProcessId].mynbody; i++) {
|
|
||||||
Local[ProcessId].mybodytab[i] = &(btab[offset+i]);
|
|
||||||
}
|
|
||||||
BARRIER(Global->Barrier,NPROC);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void find_my_bodies(nodeptr mycell, long work, long direction, long ProcessId)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
leafptr l;
|
|
||||||
nodeptr qptr;
|
|
||||||
|
|
||||||
if (Type(mycell) == LEAF) {
|
|
||||||
l = (leafptr) mycell;
|
|
||||||
for (i = 0; i < l->num_bodies; i++) {
|
|
||||||
if (work >= Local[ProcessId].workMin - .1) {
|
|
||||||
if((Local[ProcessId].mynbody+2) > maxmybody) {
|
|
||||||
error("find_my_bodies: Processor %ld needs more than %ld bodies; increase fleaves\n", ProcessId, maxmybody);
|
|
||||||
}
|
|
||||||
Local[ProcessId].mybodytab[Local[ProcessId].mynbody++] =
|
|
||||||
Bodyp(l)[i];
|
|
||||||
}
|
|
||||||
work += Cost(Bodyp(l)[i]);
|
|
||||||
if (work >= Local[ProcessId].workMax-.1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
for(i = 0; (i < NSUB) && (work < (Local[ProcessId].workMax - .1)); i++){
|
|
||||||
qptr = Subp(mycell)[Child_Sequence[direction][i]];
|
|
||||||
if (qptr!=NULL) {
|
|
||||||
if ((work+Cost(qptr)) >= (Local[ProcessId].workMin -.1)) {
|
|
||||||
find_my_bodies(qptr,work, Direction_Sequence[direction][i],
|
|
||||||
ProcessId);
|
|
||||||
}
|
|
||||||
work += Cost(qptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HOUSEKEEP: reinitialize the different variables (in particular global
|
|
||||||
* variables) between each time step.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void Housekeep(long ProcessId)
|
|
||||||
{
|
|
||||||
Local[ProcessId].myn2bcalc = Local[ProcessId].mynbccalc
|
|
||||||
= Local[ProcessId].myselfint = 0;
|
|
||||||
SETVS(Local[ProcessId].min,1E99);
|
|
||||||
SETVS(Local[ProcessId].max,-1E99);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SETBOUND: Compute the initial size of the root of the tree; only done
|
|
||||||
* before first time step, and only processor 0 does it
|
|
||||||
*/
|
|
||||||
void setbound()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
real side ;
|
|
||||||
bodyptr p;
|
|
||||||
|
|
||||||
SETVS(Local[0].min,1E99);
|
|
||||||
SETVS(Local[0].max,-1E99);
|
|
||||||
side=0;
|
|
||||||
|
|
||||||
for (p = bodytab; p < bodytab+nbody; p++) {
|
|
||||||
for (i=0; i<NDIM;i++) {
|
|
||||||
if (Pos(p)[i]<Local[0].min[i]) Local[0].min[i]=Pos(p)[i] ;
|
|
||||||
if (Pos(p)[i]>Local[0].max[i]) Local[0].max[i]=Pos(p)[i] ;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SUBV(Local[0].max,Local[0].max,Local[0].min);
|
|
||||||
for (i=0; i<NDIM;i++) if (side<Local[0].max[i]) side=Local[0].max[i];
|
|
||||||
ADDVS(Global->rmin,Local[0].min,-side/100000.0);
|
|
||||||
Global->rsize = 1.00002*side;
|
|
||||||
SETVS(Global->max,-1E99);
|
|
||||||
SETVS(Global->min,1E99);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Help()
|
|
||||||
{
|
|
||||||
printf("There are a total of twelve parameters, and all of them have default values.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("1) infile (char*) : The name of an input file that contains particle data. \n");
|
|
||||||
printf(" The format of the file is:\n");
|
|
||||||
printf("\ta) An int representing the number of particles in the distribution\n");
|
|
||||||
printf("\tb) An int representing the dimensionality of the problem (3-D)\n");
|
|
||||||
printf("\tc) A double representing the current time of the simulation\n");
|
|
||||||
printf("\td) Doubles representing the masses of all the particles\n");
|
|
||||||
printf("\te) A vector (length equal to the dimensionality) of doubles\n");
|
|
||||||
printf("\t representing the positions of all the particles\n");
|
|
||||||
printf("\tf) A vector (length equal to the dimensionality) of doubles\n");
|
|
||||||
printf("\t representing the velocities of all the particles\n");
|
|
||||||
printf("\n");
|
|
||||||
printf(" Each of these numbers can be separated by any amount of whitespace.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("2) nbody (int) : If no input file is specified (the first line is blank), this\n");
|
|
||||||
printf(" number specifies the number of particles to generate under a plummer model.\n");
|
|
||||||
printf(" Default is 16384.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("3) seed (int) : The seed used by the random number generator.\n");
|
|
||||||
printf(" Default is 123.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("4) outfile (char*) : The name of the file that snapshots will be printed to. \n");
|
|
||||||
printf(" This feature has been disabled in the SPLASH release.\n");
|
|
||||||
printf(" Default is NULL.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("5) dtime (double) : The integration time-step.\n");
|
|
||||||
printf(" Default is 0.025.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("6) eps (double) : The usual potential softening\n");
|
|
||||||
printf(" Default is 0.05.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("7) tol (double) : The cell subdivision tolerance.\n");
|
|
||||||
printf(" Default is 1.0.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("8) fcells (double) : The total number of cells created is equal to \n");
|
|
||||||
printf(" fcells * number of leaves.\n");
|
|
||||||
printf(" Default is 2.0.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("9) fleaves (double) : The total number of leaves created is equal to \n");
|
|
||||||
printf(" fleaves * nbody.\n");
|
|
||||||
printf(" Default is 0.5.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("10) tstop (double) : The time to stop integration.\n");
|
|
||||||
printf(" Default is 0.075.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("11) dtout (double) : The data-output interval.\n");
|
|
||||||
printf(" Default is 0.25.\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("12) NPROC (int) : The number of processors.\n");
|
|
||||||
printf(" Default is 1.\n");
|
|
||||||
}
|
|
|
@ -1,155 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* CODE.H: define various global things for CODE.C.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _CODE_H_
|
|
||||||
#define _CODE_H_
|
|
||||||
|
|
||||||
#define PAD_SIZE (PAGE_SIZE / (sizeof(long)))
|
|
||||||
|
|
||||||
/* Defined by the input file */
|
|
||||||
global string headline; /* message describing calculation */
|
|
||||||
global string infile; /* file name for snapshot input */
|
|
||||||
global string outfile; /* file name for snapshot output */
|
|
||||||
global real dtime; /* timestep for leapfrog integrator */
|
|
||||||
global real dtout; /* time between data outputs */
|
|
||||||
global real tstop; /* time to stop calculation */
|
|
||||||
global long nbody; /* number of bodies in system */
|
|
||||||
global real fcells; /* ratio of cells/leaves allocated */
|
|
||||||
global real fleaves; /* ratio of leaves/bodies allocated */
|
|
||||||
global real tol; /* accuracy parameter: 0.0 => exact */
|
|
||||||
global real tolsq; /* square of previous */
|
|
||||||
global real eps; /* potential softening parameter */
|
|
||||||
global real epssq; /* square of previous */
|
|
||||||
global real dthf; /* half time step */
|
|
||||||
global long NPROC; /* Number of Processors */
|
|
||||||
|
|
||||||
global long maxcell; /* max number of cells allocated */
|
|
||||||
global long maxleaf; /* max number of leaves allocated */
|
|
||||||
global long maxmybody; /* max no. of bodies allocated per processor */
|
|
||||||
global long maxmycell; /* max num. of cells to be allocated */
|
|
||||||
global long maxmyleaf; /* max num. of leaves to be allocated */
|
|
||||||
global bodyptr bodytab; /* array size is exactly nbody bodies */
|
|
||||||
|
|
||||||
global struct CellLockType {
|
|
||||||
ALOCKDEC(CL,MAXLOCK) /* locks on the cells*/
|
|
||||||
} *CellLock;
|
|
||||||
|
|
||||||
struct GlobalMemory { /* all this info is for the whole system */
|
|
||||||
long n2bcalc; /* total number of body/cell interactions */
|
|
||||||
long nbccalc; /* total number of body/body interactions */
|
|
||||||
long selfint; /* number of self interactions */
|
|
||||||
real mtot; /* total mass of N-body system */
|
|
||||||
real etot[3]; /* binding, kinetic, potential energy */
|
|
||||||
matrix keten; /* kinetic energy tensor */
|
|
||||||
matrix peten; /* potential energy tensor */
|
|
||||||
vector cmphase[2]; /* center of mass coordinates and velocity */
|
|
||||||
vector amvec; /* angular momentum vector */
|
|
||||||
cellptr G_root; /* root of the whole tree */
|
|
||||||
vector rmin; /* lower-left corner of coordinate box */
|
|
||||||
vector min; /* temporary lower-left corner of the box */
|
|
||||||
vector max; /* temporary upper right corner of the box */
|
|
||||||
real rsize; /* side-length of integer coordinate box */
|
|
||||||
BARDEC(Barrier) /* barrier at the beginning of stepsystem */
|
|
||||||
LOCKDEC(CountLock) /* Lock on the shared variables */
|
|
||||||
LOCKDEC(NcellLock) /* Lock on the counter of array of cells for loadtree */
|
|
||||||
LOCKDEC(NleafLock)/* Lock on the counter of array of leaves for loadtree */
|
|
||||||
LOCKDEC(io_lock)
|
|
||||||
unsigned long createstart,createend,computestart,computeend;
|
|
||||||
unsigned long trackstart, trackend, tracktime;
|
|
||||||
unsigned long partitionstart, partitionend, partitiontime;
|
|
||||||
unsigned long treebuildstart, treebuildend, treebuildtime;
|
|
||||||
unsigned long forcecalcstart, forcecalcend, forcecalctime;
|
|
||||||
long current_id;
|
|
||||||
volatile long k; /*for memory allocation in code.C */
|
|
||||||
};
|
|
||||||
global struct GlobalMemory *Global;
|
|
||||||
|
|
||||||
/* This structure is needed because under the sproc model there is no
|
|
||||||
* per processor private address space.
|
|
||||||
*/
|
|
||||||
struct local_memory {
|
|
||||||
/* Use padding so that each processor's variables are on their own page */
|
|
||||||
long pad_begin[PAD_SIZE];
|
|
||||||
|
|
||||||
real tnow; /* current value of simulation time */
|
|
||||||
real tout; /* time next output is due */
|
|
||||||
long nstep; /* number of integration steps so far */
|
|
||||||
|
|
||||||
long workMin, workMax;/* interval of cost to be treated by a proc */
|
|
||||||
|
|
||||||
vector min, max; /* min and max of coordinates for each Proc. */
|
|
||||||
|
|
||||||
long mynumcell; /* num. of cells used for this proc in ctab */
|
|
||||||
long mynumleaf; /* num. of leaves used for this proc in ctab */
|
|
||||||
long mynbody; /* num bodies allocated to the processor */
|
|
||||||
bodyptr* mybodytab; /* array of bodies allocated / processor */
|
|
||||||
long myncell; /* num cells allocated to the processor */
|
|
||||||
cellptr* mycelltab; /* array of cellptrs allocated to the processor */
|
|
||||||
long mynleaf; /* number of leaves allocated to the processor */
|
|
||||||
leafptr* myleaftab; /* array of leafptrs allocated to the processor */
|
|
||||||
cellptr ctab; /* array of cells used for the tree. */
|
|
||||||
leafptr ltab; /* array of cells used for the tree. */
|
|
||||||
|
|
||||||
long myn2bcalc; /* body-body force calculations for each processor */
|
|
||||||
long mynbccalc; /* body-cell force calculations for each processor */
|
|
||||||
long myselfint; /* count self-interactions for each processor */
|
|
||||||
long myn2bterm; /* count body-body terms for a body */
|
|
||||||
long mynbcterm; /* count body-cell terms for a body */
|
|
||||||
bool skipself; /* true if self-interaction skipped OK */
|
|
||||||
bodyptr pskip; /* body to skip in force evaluation */
|
|
||||||
vector pos0; /* point at which to evaluate field */
|
|
||||||
real phi0; /* computed potential at pos0 */
|
|
||||||
vector acc0; /* computed acceleration at pos0 */
|
|
||||||
vector dr; /* data to be shared */
|
|
||||||
real drsq; /* between gravsub and subdivp */
|
|
||||||
nodeptr pmem; /* remember particle data */
|
|
||||||
|
|
||||||
nodeptr Current_Root;
|
|
||||||
long Root_Coords[NDIM];
|
|
||||||
|
|
||||||
real mymtot; /* total mass of N-body system */
|
|
||||||
real myetot[3]; /* binding, kinetic, potential energy */
|
|
||||||
matrix myketen; /* kinetic energy tensor */
|
|
||||||
matrix mypeten; /* potential energy tensor */
|
|
||||||
vector mycmphase[2]; /* center of mass coordinates */
|
|
||||||
vector myamvec; /* angular momentum vector */
|
|
||||||
|
|
||||||
long pad_end[PAD_SIZE];
|
|
||||||
};
|
|
||||||
global struct local_memory Local[MAX_PROC];
|
|
||||||
|
|
||||||
void SlaveStart(void);
|
|
||||||
void stepsystem(long ProcessId);
|
|
||||||
void ComputeForces(long ProcessId);
|
|
||||||
void Help(void);
|
|
||||||
void ANLinit(void);
|
|
||||||
void init_root(void);
|
|
||||||
void tab_init(void);
|
|
||||||
void startrun(void);
|
|
||||||
void testdata(void);
|
|
||||||
void pickshell(real vec[], real rad);
|
|
||||||
void find_my_initial_bodies(bodyptr btab, long nbody, long ProcessId);
|
|
||||||
void find_my_bodies(nodeptr mycell, long work, long direction, long ProcessId);
|
|
||||||
void Housekeep(long ProcessId);
|
|
||||||
void setbound(void);
|
|
||||||
long Log_base_2(long number);
|
|
||||||
long intpow(long i, long j);
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,235 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* CODE_IO.C:
|
|
||||||
*/
|
|
||||||
EXTERN_ENV
|
|
||||||
#define global extern
|
|
||||||
|
|
||||||
#include "stdinc.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* INPUTDATA: read initial conditions from input file.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void inputdata ()
|
|
||||||
{
|
|
||||||
stream instr;
|
|
||||||
permanent char headbuf[128];
|
|
||||||
long ndim;
|
|
||||||
real tnow;
|
|
||||||
bodyptr p;
|
|
||||||
long i;
|
|
||||||
|
|
||||||
fprintf(stderr,"reading input file : %s\n",infile);
|
|
||||||
fflush(stderr);
|
|
||||||
instr = fopen(infile, "r");
|
|
||||||
if (instr == NULL)
|
|
||||||
error("inputdata: cannot find file %s\n", infile);
|
|
||||||
sprintf(headbuf, "Hack code: input file %s\n", infile);
|
|
||||||
headline = headbuf;
|
|
||||||
in_int(instr, &nbody);
|
|
||||||
if (nbody < 1)
|
|
||||||
error("inputdata: nbody = %ld is absurd\n", nbody);
|
|
||||||
in_int(instr, &ndim);
|
|
||||||
if (ndim != NDIM)
|
|
||||||
error("inputdata: NDIM = %ld ndim = %ld is absurd\n", NDIM, ndim);
|
|
||||||
in_real(instr, &tnow);
|
|
||||||
for (i = 0; i < MAX_PROC; i++) {
|
|
||||||
Local[i].tnow = tnow;
|
|
||||||
}
|
|
||||||
bodytab = (bodyptr) G_MALLOC(nbody * sizeof(body));
|
|
||||||
if (bodytab == NULL)
|
|
||||||
error("inputdata: not enuf memory\n");
|
|
||||||
for (p = bodytab; p < bodytab+nbody; p++) {
|
|
||||||
Type(p) = BODY;
|
|
||||||
Cost(p) = 1;
|
|
||||||
Phi(p) = 0.0;
|
|
||||||
CLRV(Acc(p));
|
|
||||||
}
|
|
||||||
for (p = bodytab; p < bodytab+nbody; p++)
|
|
||||||
in_real(instr, &Mass(p));
|
|
||||||
for (p = bodytab; p < bodytab+nbody; p++)
|
|
||||||
in_vector(instr, Pos(p));
|
|
||||||
for (p = bodytab; p < bodytab+nbody; p++)
|
|
||||||
in_vector(instr, Vel(p));
|
|
||||||
fclose(instr);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* INITOUTPUT: initialize output routines.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
void initoutput()
|
|
||||||
{
|
|
||||||
printf("\n\t\t%s\n\n", headline);
|
|
||||||
printf("%10s%10s%10s%10s%10s%10s%10s%10s\n",
|
|
||||||
"nbody", "dtime", "eps", "tol", "dtout", "tstop","fcells","NPROC");
|
|
||||||
printf("%10ld%10.5f%10.4f%10.2f%10.3f%10.3f%10.2f%10ld\n\n",
|
|
||||||
nbody, dtime, eps, tol, dtout, tstop, fcells, NPROC);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* STOPOUTPUT: finish up after a run.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* OUTPUT: compute diagnostics and output data.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void output(long ProcessId)
|
|
||||||
{
|
|
||||||
long nttot, nbavg, ncavg,k;
|
|
||||||
vector tempv1,tempv2;
|
|
||||||
|
|
||||||
if ((Local[ProcessId].tout - 0.01 * dtime) <= Local[ProcessId].tnow) {
|
|
||||||
Local[ProcessId].tout += dtout;
|
|
||||||
}
|
|
||||||
|
|
||||||
diagnostics(ProcessId);
|
|
||||||
|
|
||||||
if (Local[ProcessId].mymtot!=0) {
|
|
||||||
LOCK(Global->CountLock);
|
|
||||||
Global->n2bcalc += Local[ProcessId].myn2bcalc;
|
|
||||||
Global->nbccalc += Local[ProcessId].mynbccalc;
|
|
||||||
Global->selfint += Local[ProcessId].myselfint;
|
|
||||||
ADDM(Global->keten, Global-> keten, Local[ProcessId].myketen);
|
|
||||||
ADDM(Global->peten, Global-> peten, Local[ProcessId].mypeten);
|
|
||||||
for (k=0;k<3;k++) Global->etot[k] += Local[ProcessId].myetot[k];
|
|
||||||
ADDV(Global->amvec, Global-> amvec, Local[ProcessId].myamvec);
|
|
||||||
|
|
||||||
MULVS(tempv1, Global->cmphase[0],Global->mtot);
|
|
||||||
MULVS(tempv2, Local[ProcessId].mycmphase[0], Local[ProcessId].mymtot);
|
|
||||||
ADDV(tempv1, tempv1, tempv2);
|
|
||||||
DIVVS(Global->cmphase[0], tempv1, Global->mtot+Local[ProcessId].mymtot);
|
|
||||||
|
|
||||||
MULVS(tempv1, Global->cmphase[1],Global->mtot);
|
|
||||||
MULVS(tempv2, Local[ProcessId].mycmphase[1], Local[ProcessId].mymtot);
|
|
||||||
ADDV(tempv1, tempv1, tempv2);
|
|
||||||
DIVVS(Global->cmphase[1], tempv1, Global->mtot+Local[ProcessId].mymtot);
|
|
||||||
Global->mtot +=Local[ProcessId].mymtot;
|
|
||||||
UNLOCK(Global->CountLock);
|
|
||||||
}
|
|
||||||
|
|
||||||
BARRIER(Global->Barrier,NPROC);
|
|
||||||
|
|
||||||
if (ProcessId==0) {
|
|
||||||
nttot = Global->n2bcalc + Global->nbccalc;
|
|
||||||
nbavg = (long) ((real) Global->n2bcalc / (real) nbody);
|
|
||||||
ncavg = (long) ((real) Global->nbccalc / (real) nbody);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* DIAGNOSTICS: compute set of dynamical diagnostics.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void diagnostics(long ProcessId)
|
|
||||||
{
|
|
||||||
register bodyptr p,*pp;
|
|
||||||
real velsq;
|
|
||||||
vector tmpv;
|
|
||||||
matrix tmpt;
|
|
||||||
|
|
||||||
Local[ProcessId].mymtot = 0.0;
|
|
||||||
Local[ProcessId].myetot[1] = Local[ProcessId].myetot[2] = 0.0;
|
|
||||||
CLRM(Local[ProcessId].myketen);
|
|
||||||
CLRM(Local[ProcessId].mypeten);
|
|
||||||
CLRV(Local[ProcessId].mycmphase[0]);
|
|
||||||
CLRV(Local[ProcessId].mycmphase[1]);
|
|
||||||
CLRV(Local[ProcessId].myamvec);
|
|
||||||
for (pp = Local[ProcessId].mybodytab+Local[ProcessId].mynbody -1;
|
|
||||||
pp >= Local[ProcessId].mybodytab; pp--) {
|
|
||||||
p= *pp;
|
|
||||||
Local[ProcessId].mymtot += Mass(p);
|
|
||||||
DOTVP(velsq, Vel(p), Vel(p));
|
|
||||||
Local[ProcessId].myetot[1] += 0.5 * Mass(p) * velsq;
|
|
||||||
Local[ProcessId].myetot[2] += 0.5 * Mass(p) * Phi(p);
|
|
||||||
MULVS(tmpv, Vel(p), 0.5 * Mass(p));
|
|
||||||
OUTVP(tmpt, tmpv, Vel(p));
|
|
||||||
ADDM(Local[ProcessId].myketen, Local[ProcessId].myketen, tmpt);
|
|
||||||
MULVS(tmpv, Pos(p), Mass(p));
|
|
||||||
OUTVP(tmpt, tmpv, Acc(p));
|
|
||||||
ADDM(Local[ProcessId].mypeten, Local[ProcessId].mypeten, tmpt);
|
|
||||||
MULVS(tmpv, Pos(p), Mass(p));
|
|
||||||
ADDV(Local[ProcessId].mycmphase[0], Local[ProcessId].mycmphase[0], tmpv);
|
|
||||||
MULVS(tmpv, Vel(p), Mass(p));
|
|
||||||
ADDV(Local[ProcessId].mycmphase[1], Local[ProcessId].mycmphase[1], tmpv);
|
|
||||||
CROSSVP(tmpv, Pos(p), Vel(p));
|
|
||||||
MULVS(tmpv, tmpv, Mass(p));
|
|
||||||
ADDV(Local[ProcessId].myamvec, Local[ProcessId].myamvec, tmpv);
|
|
||||||
}
|
|
||||||
Local[ProcessId].myetot[0] = Local[ProcessId].myetot[1]
|
|
||||||
+ Local[ProcessId].myetot[2];
|
|
||||||
if (Local[ProcessId].mymtot!=0){
|
|
||||||
DIVVS(Local[ProcessId].mycmphase[0], Local[ProcessId].mycmphase[0],
|
|
||||||
Local[ProcessId].mymtot);
|
|
||||||
DIVVS(Local[ProcessId].mycmphase[1], Local[ProcessId].mycmphase[1],
|
|
||||||
Local[ProcessId].mymtot);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Low-level input and output operations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void in_int(stream str, long *iptr)
|
|
||||||
{
|
|
||||||
if (fscanf(str, "%ld", iptr) != 1)
|
|
||||||
error("in_int: input conversion print_error\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
void in_real(stream str, real *rptr)
|
|
||||||
{
|
|
||||||
double tmp;
|
|
||||||
|
|
||||||
if (fscanf(str, "%lf", &tmp) != 1)
|
|
||||||
error("in_real: input conversion print_error\n");
|
|
||||||
*rptr = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
void in_vector(stream str, vector vec)
|
|
||||||
{
|
|
||||||
double tmpx, tmpy, tmpz;
|
|
||||||
|
|
||||||
if (fscanf(str, "%lf%lf%lf", &tmpx, &tmpy, &tmpz) != 3)
|
|
||||||
error("in_vector: input conversion print_error\n");
|
|
||||||
vec[0] = tmpx; vec[1] = tmpy; vec[2] = tmpz;
|
|
||||||
}
|
|
||||||
|
|
||||||
void out_int(stream str, long ival)
|
|
||||||
{
|
|
||||||
fprintf(str, " %ld\n", ival);
|
|
||||||
}
|
|
||||||
|
|
||||||
void out_real(stream str, real rval)
|
|
||||||
{
|
|
||||||
fprintf(str, " %21.14E\n", rval);
|
|
||||||
}
|
|
||||||
|
|
||||||
void out_vector(stream str, vector vec)
|
|
||||||
{
|
|
||||||
fprintf(str, " %21.14E %21.14E", vec[0], vec[1]);
|
|
||||||
fprintf(str, " %21.14E\n",vec[2]);
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
#ifndef _CODE_IO_H_
|
|
||||||
#define _CODE_IO_H_
|
|
||||||
|
|
||||||
void inputdata(void);
|
|
||||||
void initoutput(void);
|
|
||||||
void output(long ProcessId);
|
|
||||||
void diagnostics(long ProcessId);
|
|
||||||
void in_int(stream str, long *iptr);
|
|
||||||
void in_real(stream str, real *rptr);
|
|
||||||
void in_vector(stream str, vector vec);
|
|
||||||
void out_int(stream str, long ival);
|
|
||||||
void out_real(stream str, real rval);
|
|
||||||
void out_vector(stream str, vector vec);
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,168 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _DEFS_H_
|
|
||||||
#define _DEFS_H_
|
|
||||||
|
|
||||||
//#include <assert.h>
|
|
||||||
|
|
||||||
#define MAX_PROC 128
|
|
||||||
#define MAX_BODIES_PER_LEAF 10
|
|
||||||
#define MAXLOCK 2048 /* maximum number of locks on DASH */
|
|
||||||
#define PAGE_SIZE 4096 /* in bytes */
|
|
||||||
|
|
||||||
#define NSUB (1 << NDIM) /* subcells per cell */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* BODY and CELL data structures are used to represent the tree:
|
|
||||||
*
|
|
||||||
* +-----------------------------------------------------------+
|
|
||||||
* root--> | CELL: mass, pos, cost, quad, /, o, /, /, /, /, o, /, done |
|
|
||||||
* +---------------------------------|--------------|----------+
|
|
||||||
* | |
|
|
||||||
* +--------------------------------------+ |
|
|
||||||
* | |
|
|
||||||
* | +--------------------------------------+ |
|
|
||||||
* +--> | BODY: mass, pos, cost, vel, acc, phi | |
|
|
||||||
* +--------------------------------------+ |
|
|
||||||
* |
|
|
||||||
* +-----------------------------------------------------+
|
|
||||||
* |
|
|
||||||
* | +-----------------------------------------------------------+
|
|
||||||
* +--> | CELL: mass, pos, cost, quad, o, /, /, o, /, /, o, /, done |
|
|
||||||
* +------------------------------|--------|--------|----------+
|
|
||||||
* etc etc etc
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* NODE: data common to BODY and CELL structures.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct _node {
|
|
||||||
long type; /* code for node type: body or cell */
|
|
||||||
real mass; /* total mass of node */
|
|
||||||
vector pos; /* position of node */
|
|
||||||
long cost; /* number of interactions computed */
|
|
||||||
long level;
|
|
||||||
struct _node *parent; /* ptr to parent of this node in tree */
|
|
||||||
long child_num; /* Index that this node should be put
|
|
||||||
at in parent cell */
|
|
||||||
} node;
|
|
||||||
|
|
||||||
typedef node* nodeptr;
|
|
||||||
|
|
||||||
#define Type(x) (((nodeptr) (x))->type)
|
|
||||||
#define Mass(x) (((nodeptr) (x))->mass)
|
|
||||||
#define Pos(x) (((nodeptr) (x))->pos)
|
|
||||||
#define Cost(x) (((nodeptr) (x))->cost)
|
|
||||||
#define Level(x) (((nodeptr) (x))->level)
|
|
||||||
#define Parent(x) (((nodeptr) (x))->parent)
|
|
||||||
#define ChildNum(x) (((nodeptr) (x))->child_num)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* BODY: data structure used to represent particles.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct _body* bodyptr;
|
|
||||||
typedef struct _leaf* leafptr;
|
|
||||||
typedef struct _cell* cellptr;
|
|
||||||
|
|
||||||
#define BODY 01 /* type code for bodies */
|
|
||||||
|
|
||||||
typedef struct _body {
|
|
||||||
long type;
|
|
||||||
real mass; /* mass of body */
|
|
||||||
vector pos; /* position of body */
|
|
||||||
long cost; /* number of interactions computed */
|
|
||||||
long level;
|
|
||||||
leafptr parent;
|
|
||||||
long child_num; /* Index that this node should be put */
|
|
||||||
vector vel; /* velocity of body */
|
|
||||||
vector acc; /* acceleration of body */
|
|
||||||
real phi; /* potential at body */
|
|
||||||
} body;
|
|
||||||
|
|
||||||
#define Vel(x) (((bodyptr) (x))->vel)
|
|
||||||
#define Acc(x) (((bodyptr) (x))->acc)
|
|
||||||
#define Phi(x) (((bodyptr) (x))->phi)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* CELL: structure used to represent internal nodes of tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define CELL 02 /* type code for cells */
|
|
||||||
|
|
||||||
typedef struct _cell {
|
|
||||||
long type;
|
|
||||||
real mass; /* total mass of cell */
|
|
||||||
vector pos; /* cm. position of cell */
|
|
||||||
long cost; /* number of interactions computed */
|
|
||||||
long level;
|
|
||||||
cellptr parent;
|
|
||||||
long child_num; /* Index [0..8] that this node should be put */
|
|
||||||
long processor; /* Used by partition code */
|
|
||||||
struct _cell *next, *prev; /* Used in the partition array */
|
|
||||||
long seqnum;
|
|
||||||
#ifdef QUADPOLE
|
|
||||||
matrix quad; /* quad. moment of cell */
|
|
||||||
#endif
|
|
||||||
volatile long done; /* flag to tell when the c.of.m is ready */
|
|
||||||
nodeptr subp[NSUB]; /* descendents of cell */
|
|
||||||
} cell;
|
|
||||||
|
|
||||||
#define Subp(x) (((cellptr) (x))->subp)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* LEAF: structure used to represent leaf nodes of tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define LEAF 03 /* type code for leaves */
|
|
||||||
|
|
||||||
typedef struct _leaf {
|
|
||||||
long type;
|
|
||||||
real mass; /* total mass of leaf */
|
|
||||||
vector pos; /* cm. position of leaf */
|
|
||||||
long cost; /* number of interactions computed */
|
|
||||||
long level;
|
|
||||||
cellptr parent;
|
|
||||||
long child_num; /* Index [0..8] that this node should be put */
|
|
||||||
long processor; /* Used by partition code */
|
|
||||||
struct _leaf *next, *prev; /* Used in the partition array */
|
|
||||||
long seqnum;
|
|
||||||
#ifdef QUADPOLE
|
|
||||||
matrix quad; /* quad. moment of leaf */
|
|
||||||
#endif
|
|
||||||
volatile long done; /* flag to tell when the c.of.m is ready */
|
|
||||||
long num_bodies;
|
|
||||||
bodyptr bodyp[MAX_BODIES_PER_LEAF]; /* bodies of leaf */
|
|
||||||
} leaf;
|
|
||||||
|
|
||||||
#define Bodyp(x) (((leafptr) (x))->bodyp)
|
|
||||||
|
|
||||||
#ifdef QUADPOLE
|
|
||||||
#define Quad(x) (((cellptr) (x))->quad)
|
|
||||||
#endif
|
|
||||||
#define Done(x) (((cellptr) (x))->done)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Integerized coordinates: used to mantain body-tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define MAXLEVEL ((8L * (long)sizeof(long)) - 2L)
|
|
||||||
#define IMAX (1L << MAXLEVEL) /* highest bit of int coord */
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,157 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GETPARAM.C:
|
|
||||||
*/
|
|
||||||
EXTERN_ENV
|
|
||||||
#define global extern
|
|
||||||
|
|
||||||
#include "stdinc.h"
|
|
||||||
|
|
||||||
local string *defaults = NULL; /* vector of "name=value" strings */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* INITPARAM: ignore arg vector, remember defaults.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void initparam(string *defv)
|
|
||||||
{
|
|
||||||
defaults = defv;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GETPARAM: export version prompts user for value.
|
|
||||||
*/
|
|
||||||
|
|
||||||
string getparam(string name)
|
|
||||||
{
|
|
||||||
long i, leng;
|
|
||||||
string def;
|
|
||||||
char buf[128];
|
|
||||||
|
|
||||||
if (defaults == NULL)
|
|
||||||
error("getparam: called before initparam\n");
|
|
||||||
i = scanbind(defaults, name);
|
|
||||||
if (i < 0)
|
|
||||||
error("getparam: %s unknown\n", name);
|
|
||||||
def = extrvalue(defaults[i]);
|
|
||||||
gets(buf);
|
|
||||||
leng = strlen(buf) + 1;
|
|
||||||
if (leng > 1) {
|
|
||||||
return (strcpy(malloc(leng), buf));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return (def);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GETIPARAM, ..., GETDPARAM: get long, long, bool, or double parameters.
|
|
||||||
*/
|
|
||||||
|
|
||||||
long getiparam(string name)
|
|
||||||
{
|
|
||||||
string val;
|
|
||||||
|
|
||||||
for (val = ""; *val == '\0';) {
|
|
||||||
val = getparam(name);
|
|
||||||
}
|
|
||||||
return (atoi(val));
|
|
||||||
}
|
|
||||||
|
|
||||||
long getlparam(string name)
|
|
||||||
{
|
|
||||||
string val;
|
|
||||||
|
|
||||||
for (val = ""; *val == '\0'; )
|
|
||||||
val = getparam(name);
|
|
||||||
return (atol(val));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool getbparam(string name)
|
|
||||||
{
|
|
||||||
string val;
|
|
||||||
|
|
||||||
for (val = ""; *val == '\0'; )
|
|
||||||
val = getparam(name);
|
|
||||||
if (strchr("tTyY1", *val) != NULL) {
|
|
||||||
return (TRUE);
|
|
||||||
}
|
|
||||||
if (strchr("fFnN0", *val) != NULL) {
|
|
||||||
return (FALSE);
|
|
||||||
}
|
|
||||||
error("getbparam: %s=%s not bool\n", name, val);
|
|
||||||
}
|
|
||||||
|
|
||||||
double getdparam(string name)
|
|
||||||
{
|
|
||||||
string val;
|
|
||||||
|
|
||||||
for (val = ""; *val == '\0'; ) {
|
|
||||||
val = getparam(name);
|
|
||||||
}
|
|
||||||
return (atof(val));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SCANBIND: scan binding vector for name, return index.
|
|
||||||
*/
|
|
||||||
|
|
||||||
long scanbind(string bvec[], string name)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
for (i = 0; bvec[i] != NULL; i++)
|
|
||||||
if (matchname(bvec[i], name))
|
|
||||||
return (i);
|
|
||||||
return (-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* MATCHNAME: determine if "name=value" matches "name".
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool matchname(string bind, string name)
|
|
||||||
{
|
|
||||||
char *bp, *np;
|
|
||||||
|
|
||||||
bp = bind;
|
|
||||||
np = name;
|
|
||||||
while (*bp == *np) {
|
|
||||||
bp++;
|
|
||||||
np++;
|
|
||||||
}
|
|
||||||
return (*bp == '=' && *np == '\0');
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* EXTRVALUE: extract value from name=value string.
|
|
||||||
*/
|
|
||||||
|
|
||||||
string extrvalue(string arg)
|
|
||||||
{
|
|
||||||
char *ap;
|
|
||||||
|
|
||||||
ap = (char *) arg;
|
|
||||||
while (*ap != '\0')
|
|
||||||
if (*ap++ == '=')
|
|
||||||
return ((string) ap);
|
|
||||||
return (NULL);
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
#ifndef _GETPARAM_H_
|
|
||||||
#define _GETPARAM_H_
|
|
||||||
|
|
||||||
void initparam(string *defv);
|
|
||||||
string getparam(string name);
|
|
||||||
long getiparam(string name);
|
|
||||||
long getlparam(string name);
|
|
||||||
bool getbparam(string name);
|
|
||||||
double getdparam(string name);
|
|
||||||
long scanbind(string bvec[], string name);
|
|
||||||
bool matchname(string bind, string name);
|
|
||||||
string extrvalue(string arg);
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,150 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GRAV.C:
|
|
||||||
*/
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
#define global extern
|
|
||||||
|
|
||||||
#include "stdinc.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HACKGRAV: evaluate grav field at a given particle.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void hackgrav(bodyptr p, long ProcessId)
|
|
||||||
{
|
|
||||||
Local[ProcessId].pskip = p;
|
|
||||||
SETV(Local[ProcessId].pos0, Pos(p));
|
|
||||||
Local[ProcessId].phi0 = 0.0;
|
|
||||||
CLRV(Local[ProcessId].acc0);
|
|
||||||
Local[ProcessId].myn2bterm = 0;
|
|
||||||
Local[ProcessId].mynbcterm = 0;
|
|
||||||
Local[ProcessId].skipself = FALSE;
|
|
||||||
hackwalk(ProcessId);
|
|
||||||
Phi(p) = Local[ProcessId].phi0;
|
|
||||||
SETV(Acc(p), Local[ProcessId].acc0);
|
|
||||||
#ifdef QUADPOLE
|
|
||||||
Cost(p) = Local[ProcessId].myn2bterm + NDIM * Local[ProcessId].mynbcterm;
|
|
||||||
#else
|
|
||||||
Cost(p) = Local[ProcessId].myn2bterm + Local[ProcessId].mynbcterm;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GRAVSUB: compute a single body-body or body-cell longeraction.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void gravsub(register nodeptr p, long ProcessId)
|
|
||||||
{
|
|
||||||
real drabs, phii, mor3;
|
|
||||||
vector ai;
|
|
||||||
|
|
||||||
if (p != Local[ProcessId].pmem) {
|
|
||||||
SUBV(Local[ProcessId].dr, Pos(p), Local[ProcessId].pos0);
|
|
||||||
DOTVP(Local[ProcessId].drsq, Local[ProcessId].dr, Local[ProcessId].dr);
|
|
||||||
}
|
|
||||||
|
|
||||||
Local[ProcessId].drsq += epssq;
|
|
||||||
drabs = sqrt((double) Local[ProcessId].drsq);
|
|
||||||
phii = Mass(p) / drabs;
|
|
||||||
Local[ProcessId].phi0 -= phii;
|
|
||||||
mor3 = phii / Local[ProcessId].drsq;
|
|
||||||
MULVS(ai, Local[ProcessId].dr, mor3);
|
|
||||||
ADDV(Local[ProcessId].acc0, Local[ProcessId].acc0, ai);
|
|
||||||
if(Type(p) != BODY) { /* a body-cell/leaf interaction? */
|
|
||||||
Local[ProcessId].mynbcterm++;
|
|
||||||
#ifdef QUADPOLE
|
|
||||||
dr5inv = 1.0/(Local[ProcessId].drsq * Local[ProcessId].drsq * drabs);
|
|
||||||
MULMV(quaddr, Quad(p), Local[ProcessId].dr);
|
|
||||||
DOTVP(drquaddr, Local[ProcessId].dr, quaddr);
|
|
||||||
phiquad = -0.5 * dr5inv * drquaddr;
|
|
||||||
Local[ProcessId].phi0 += phiquad;
|
|
||||||
phiquad = 5.0 * phiquad / Local[ProcessId].drsq;
|
|
||||||
MULVS(ai, Local[ProcessId].dr, phiquad);
|
|
||||||
SUBV(Local[ProcessId].acc0, Local[ProcessId].acc0, ai);
|
|
||||||
MULVS(quaddr, quaddr, dr5inv);
|
|
||||||
SUBV(Local[ProcessId].acc0, Local[ProcessId].acc0, quaddr);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
else { /* a body-body interaction */
|
|
||||||
Local[ProcessId].myn2bterm++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HACKWALK: walk the tree opening cells too close to a given point.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void hackwalk(long ProcessId)
|
|
||||||
{
|
|
||||||
walksub(Global->G_root, Global->rsize * Global->rsize, ProcessId);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* WALKSUB: recursive routine to do hackwalk operation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void walksub(nodeptr n, real dsq, long ProcessId)
|
|
||||||
{
|
|
||||||
nodeptr* nn;
|
|
||||||
leafptr l;
|
|
||||||
bodyptr p;
|
|
||||||
long i;
|
|
||||||
|
|
||||||
if (subdivp(n, dsq, ProcessId)) {
|
|
||||||
if (Type(n) == CELL) {
|
|
||||||
for (nn = Subp(n); nn < Subp(n) + NSUB; nn++) {
|
|
||||||
if (*nn != NULL) {
|
|
||||||
walksub(*nn, dsq / 4.0, ProcessId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
l = (leafptr) n;
|
|
||||||
for (i = 0; i < l->num_bodies; i++) {
|
|
||||||
p = Bodyp(l)[i];
|
|
||||||
if (p != Local[ProcessId].pskip) {
|
|
||||||
gravsub(p, ProcessId);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Local[ProcessId].skipself = TRUE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
gravsub(n, ProcessId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SUBDIVP: decide if a node should be opened.
|
|
||||||
* Side effects: sets pmem,dr, and drsq.
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool subdivp(register nodeptr p, real dsq, long ProcessId)
|
|
||||||
{
|
|
||||||
SUBV(Local[ProcessId].dr, Pos(p), Local[ProcessId].pos0);
|
|
||||||
DOTVP(Local[ProcessId].drsq, Local[ProcessId].dr, Local[ProcessId].dr);
|
|
||||||
Local[ProcessId].pmem = p;
|
|
||||||
return (tolsq * Local[ProcessId].drsq < dsq);
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
#ifndef _GRAV_H_
|
|
||||||
#define _GRAV_H_
|
|
||||||
|
|
||||||
void hackgrav(bodyptr p, long ProcessId);
|
|
||||||
void gravsub(register nodeptr p, long ProcessId);
|
|
||||||
void hackwalk(long ProcessId);
|
|
||||||
void walksub(nodeptr n, real dsq, long ProcessId);
|
|
||||||
bool subdivp(register nodeptr p, real dsq, long ProcessId);
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,12 +0,0 @@
|
||||||
|
|
||||||
16384
|
|
||||||
123
|
|
||||||
|
|
||||||
0.025
|
|
||||||
0.05
|
|
||||||
1.0
|
|
||||||
2.0
|
|
||||||
5.0
|
|
||||||
0.075
|
|
||||||
0.25
|
|
||||||
1
|
|
Binary file not shown.
|
@ -1,517 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
#define global extern
|
|
||||||
|
|
||||||
#include "stdinc.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* MAKETREE: initialize tree structure for hack force calculation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void maketree(long ProcessId)
|
|
||||||
{
|
|
||||||
bodyptr p, *pp;
|
|
||||||
|
|
||||||
Local[ProcessId].myncell = 0;
|
|
||||||
Local[ProcessId].mynleaf = 0;
|
|
||||||
if (ProcessId == 0) {
|
|
||||||
Local[ProcessId].mycelltab[Local[ProcessId].myncell++] = Global->G_root;
|
|
||||||
}
|
|
||||||
Local[ProcessId].Current_Root = (nodeptr) Global->G_root;
|
|
||||||
for (pp = Local[ProcessId].mybodytab;
|
|
||||||
pp < Local[ProcessId].mybodytab+Local[ProcessId].mynbody; pp++) {
|
|
||||||
p = *pp;
|
|
||||||
if (Mass(p) != 0.0) {
|
|
||||||
Local[ProcessId].Current_Root
|
|
||||||
= (nodeptr) loadtree(p, (cellptr) Local[ProcessId].Current_Root,
|
|
||||||
ProcessId);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
LOCK(Global->io_lock);
|
|
||||||
fprintf(stderr, "Process %ld found body %ld to have zero mass\n",
|
|
||||||
ProcessId, (long) p);
|
|
||||||
UNLOCK(Global->io_lock);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
BARRIER(Global->Barrier,NPROC);
|
|
||||||
hackcofm(ProcessId );
|
|
||||||
BARRIER(Global->Barrier,NPROC);
|
|
||||||
}
|
|
||||||
|
|
||||||
cellptr InitCell(cellptr parent, long ProcessId)
|
|
||||||
{
|
|
||||||
cellptr c;
|
|
||||||
|
|
||||||
c = makecell(ProcessId);
|
|
||||||
c->processor = ProcessId;
|
|
||||||
c->next = NULL;
|
|
||||||
c->prev = NULL;
|
|
||||||
if (parent == NULL)
|
|
||||||
Level(c) = IMAX >> 1;
|
|
||||||
else
|
|
||||||
Level(c) = Level(parent) >> 1;
|
|
||||||
Parent(c) = (nodeptr) parent;
|
|
||||||
ChildNum(c) = 0;
|
|
||||||
return (c);
|
|
||||||
}
|
|
||||||
|
|
||||||
leafptr InitLeaf(cellptr parent, long ProcessId)
|
|
||||||
{
|
|
||||||
leafptr l;
|
|
||||||
|
|
||||||
l = makeleaf(ProcessId);
|
|
||||||
l->processor = ProcessId;
|
|
||||||
l->next = NULL;
|
|
||||||
l->prev = NULL;
|
|
||||||
if (parent==NULL)
|
|
||||||
Level(l) = IMAX >> 1;
|
|
||||||
else
|
|
||||||
Level(l) = Level(parent) >> 1;
|
|
||||||
Parent(l) = (nodeptr) parent;
|
|
||||||
ChildNum(l) = 0;
|
|
||||||
return (l);
|
|
||||||
}
|
|
||||||
|
|
||||||
void printtree(nodeptr n)
|
|
||||||
{
|
|
||||||
long k;
|
|
||||||
cellptr c;
|
|
||||||
leafptr l;
|
|
||||||
bodyptr p;
|
|
||||||
nodeptr tmp;
|
|
||||||
long nseq;
|
|
||||||
|
|
||||||
switch (Type(n)) {
|
|
||||||
case CELL:
|
|
||||||
c = (cellptr) n;
|
|
||||||
nseq = c->seqnum;
|
|
||||||
printf("Cell : Cost = %ld, ", Cost(c));
|
|
||||||
PRTV("Pos", Pos(n));
|
|
||||||
printf("\n");
|
|
||||||
for (k = 0; k < NSUB; k++) {
|
|
||||||
printf("Child #%ld: ", k);
|
|
||||||
if (Subp(c)[k] == NULL) {
|
|
||||||
printf("NONE");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (Type(Subp(c)[k]) == CELL) {
|
|
||||||
nseq = ((cellptr) Subp(c)[k])->seqnum;
|
|
||||||
printf("C: Cost = %ld, ", Cost(Subp(c)[k]));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
nseq = ((leafptr) Subp(c)[k])->seqnum;
|
|
||||||
printf("L: # Bodies = %2ld, Cost = %ld, ",
|
|
||||||
((leafptr) Subp(c)[k])->num_bodies, Cost(Subp(c)[k]));
|
|
||||||
}
|
|
||||||
tmp = Subp(c)[k];
|
|
||||||
PRTV("Pos", Pos(tmp));
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
for (k=0;k<NSUB;k++) {
|
|
||||||
if (Subp(c)[k] != NULL) {
|
|
||||||
printtree(Subp(c)[k]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case LEAF:
|
|
||||||
l = (leafptr) n;
|
|
||||||
nseq = l->seqnum;
|
|
||||||
printf("Leaf : # Bodies = %2ld, Cost = %ld, ", l->num_bodies, Cost(l));
|
|
||||||
PRTV("Pos", Pos(n));
|
|
||||||
printf("\n");
|
|
||||||
for (k = 0; k < l->num_bodies; k++) {
|
|
||||||
p = Bodyp(l)[k];
|
|
||||||
printf("Body #%2ld: Num = %2ld, Level = %ld, ",
|
|
||||||
p - bodytab, k, Level(p));
|
|
||||||
PRTV("Pos",Pos(p));
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fprintf(stderr, "Bad type\n");
|
|
||||||
exit(-1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* LOADTREE: descend tree and insert particle.
|
|
||||||
*/
|
|
||||||
|
|
||||||
nodeptr loadtree(bodyptr p, cellptr root, long ProcessId)
|
|
||||||
{
|
|
||||||
long l, xp[NDIM], xor[NDIM], flag;
|
|
||||||
long i, j, root_level;
|
|
||||||
bool valid_root;
|
|
||||||
long kidIndex;
|
|
||||||
volatile nodeptr *volatile qptr, mynode;
|
|
||||||
leafptr le;
|
|
||||||
|
|
||||||
intcoord(xp, Pos(p));
|
|
||||||
valid_root = TRUE;
|
|
||||||
for (i = 0; i < NDIM; i++) {
|
|
||||||
xor[i] = xp[i] ^ Local[ProcessId].Root_Coords[i];
|
|
||||||
}
|
|
||||||
for (i = IMAX >> 1; i > Level(root); i >>= 1) {
|
|
||||||
for (j = 0; j < NDIM; j++) {
|
|
||||||
if (xor[j] & i) {
|
|
||||||
valid_root = FALSE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!valid_root) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!valid_root) {
|
|
||||||
if (root != Global->G_root) {
|
|
||||||
root_level = Level(root);
|
|
||||||
for (j = i; j > root_level; j >>= 1) {
|
|
||||||
root = (cellptr) Parent(root);
|
|
||||||
}
|
|
||||||
valid_root = TRUE;
|
|
||||||
for (i = IMAX >> 1; i > Level(root); i >>= 1) {
|
|
||||||
for (j = 0; j < NDIM; j++) {
|
|
||||||
if (xor[j] & i) {
|
|
||||||
valid_root = FALSE;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!valid_root) {
|
|
||||||
printf("P%ld body %ld\n", ProcessId, p - bodytab);
|
|
||||||
root = Global->G_root;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
root = Global->G_root;
|
|
||||||
mynode = (nodeptr) root;
|
|
||||||
kidIndex = subindex(xp, Level(mynode));
|
|
||||||
qptr = &Subp(mynode)[kidIndex];
|
|
||||||
|
|
||||||
l = Level(mynode) >> 1;
|
|
||||||
flag = TRUE;
|
|
||||||
while (flag) { /* loop descending tree */
|
|
||||||
if (l == 0) {
|
|
||||||
error("not enough levels in tree\n");
|
|
||||||
}
|
|
||||||
if (*qptr == NULL) {
|
|
||||||
/* lock the parent cell */
|
|
||||||
ALOCK(CellLock->CL, ((cellptr) mynode)->seqnum % MAXLOCK);
|
|
||||||
if (*qptr == NULL) {
|
|
||||||
le = InitLeaf((cellptr) mynode, ProcessId);
|
|
||||||
Parent(p) = (nodeptr) le;
|
|
||||||
Level(p) = l;
|
|
||||||
ChildNum(p) = le->num_bodies;
|
|
||||||
ChildNum(le) = kidIndex;
|
|
||||||
Bodyp(le)[le->num_bodies++] = p;
|
|
||||||
*qptr = (nodeptr) le;
|
|
||||||
flag = FALSE;
|
|
||||||
}
|
|
||||||
AULOCK(CellLock->CL, ((cellptr) mynode)->seqnum % MAXLOCK);
|
|
||||||
/* unlock the parent cell */
|
|
||||||
}
|
|
||||||
if (flag && *qptr && (Type(*qptr) == LEAF)) {
|
|
||||||
/* reached a "leaf"? */
|
|
||||||
ALOCK(CellLock->CL, ((cellptr) mynode)->seqnum % MAXLOCK);
|
|
||||||
/* lock the parent cell */
|
|
||||||
if (Type(*qptr) == LEAF) { /* still a "leaf"? */
|
|
||||||
le = (leafptr) *qptr;
|
|
||||||
if (le->num_bodies == MAX_BODIES_PER_LEAF) {
|
|
||||||
*qptr = (nodeptr) SubdivideLeaf(le, (cellptr) mynode, l,
|
|
||||||
ProcessId);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Parent(p) = (nodeptr) le;
|
|
||||||
Level(p) = l;
|
|
||||||
ChildNum(p) = le->num_bodies;
|
|
||||||
Bodyp(le)[le->num_bodies++] = p;
|
|
||||||
flag = FALSE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
AULOCK(CellLock->CL, ((cellptr) mynode)->seqnum % MAXLOCK);
|
|
||||||
/* unlock the node */
|
|
||||||
}
|
|
||||||
if (flag) {
|
|
||||||
mynode = *qptr;
|
|
||||||
kidIndex = subindex(xp, l);
|
|
||||||
qptr = &Subp(*qptr)[kidIndex]; /* move down one level */
|
|
||||||
l = l >> 1; /* and test next bit */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SETV(Local[ProcessId].Root_Coords, xp);
|
|
||||||
return Parent((leafptr) *qptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* * INTCOORD: compute integerized coordinates. * Returns: TRUE
|
|
||||||
unless rp was out of bounds. */
|
|
||||||
|
|
||||||
/* integerized coordinate vector [0,IMAX) */
|
|
||||||
/* real coordinate vector (system coords) */
|
|
||||||
bool intcoord(long xp[NDIM], vector rp)
|
|
||||||
{
|
|
||||||
long k;
|
|
||||||
bool inb;
|
|
||||||
double xsc;
|
|
||||||
|
|
||||||
inb = TRUE;
|
|
||||||
for (k = 0; k < NDIM; k++) {
|
|
||||||
xsc = (rp[k] - Global->rmin[k]) / Global->rsize;
|
|
||||||
if (0.0 <= xsc && xsc < 1.0) {
|
|
||||||
xp[k] = floor(IMAX * xsc);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
inb = FALSE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (inb);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SUBINDEX: determine which subcell to select.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* integerized coordinates of particle */
|
|
||||||
/* current level of tree */
|
|
||||||
long subindex(long x[NDIM], long l)
|
|
||||||
{
|
|
||||||
long i, k;
|
|
||||||
long yes;
|
|
||||||
|
|
||||||
i = 0;
|
|
||||||
yes = FALSE;
|
|
||||||
if (x[0] & l) {
|
|
||||||
i += NSUB >> 1;
|
|
||||||
yes = TRUE;
|
|
||||||
}
|
|
||||||
for (k = 1; k < NDIM; k++) {
|
|
||||||
if (((x[k] & l) && !yes) || (!(x[k] & l) && yes)) {
|
|
||||||
i += NSUB >> (k + 1);
|
|
||||||
yes = TRUE;
|
|
||||||
}
|
|
||||||
else yes = FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (i);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* HACKCOFM: descend tree finding center-of-mass coordinates.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void hackcofm(long ProcessId)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
nodeptr r;
|
|
||||||
leafptr l;
|
|
||||||
leafptr* ll;
|
|
||||||
bodyptr p;
|
|
||||||
cellptr q;
|
|
||||||
cellptr *cc;
|
|
||||||
vector tmpv;
|
|
||||||
|
|
||||||
/* get a cell using get*sub. Cells are got in reverse of the order in */
|
|
||||||
/* the cell array; i.e. reverse of the order in which they were created */
|
|
||||||
/* this way, we look at child cells before parents */
|
|
||||||
|
|
||||||
for (ll = Local[ProcessId].myleaftab + Local[ProcessId].mynleaf - 1;
|
|
||||||
ll >= Local[ProcessId].myleaftab; ll--) {
|
|
||||||
l = *ll;
|
|
||||||
Mass(l) = 0.0;
|
|
||||||
Cost(l) = 0;
|
|
||||||
CLRV(Pos(l));
|
|
||||||
for (i = 0; i < l->num_bodies; i++) {
|
|
||||||
p = Bodyp(l)[i];
|
|
||||||
Mass(l) += Mass(p);
|
|
||||||
Cost(l) += Cost(p);
|
|
||||||
MULVS(tmpv, Pos(p), Mass(p));
|
|
||||||
ADDV(Pos(l), Pos(l), tmpv);
|
|
||||||
}
|
|
||||||
DIVVS(Pos(l), Pos(l), Mass(l));
|
|
||||||
#ifdef QUADPOLE
|
|
||||||
CLRM(Quad(l));
|
|
||||||
for (i = 0; i < l->num_bodies; i++) {
|
|
||||||
p = Bodyp(l)[i];
|
|
||||||
SUBV(dr, Pos(p), Pos(l));
|
|
||||||
OUTVP(drdr, dr, dr);
|
|
||||||
DOTVP(drsq, dr, dr);
|
|
||||||
SETMI(Idrsq);
|
|
||||||
MULMS(Idrsq, Idrsq, drsq);
|
|
||||||
MULMS(tmpm, drdr, 3.0);
|
|
||||||
SUBM(tmpm, tmpm, Idrsq);
|
|
||||||
MULMS(tmpm, tmpm, Mass(p));
|
|
||||||
ADDM(Quad(l), Quad(l), tmpm);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
Done(l)=TRUE;
|
|
||||||
}
|
|
||||||
for (cc = Local[ProcessId].mycelltab+Local[ProcessId].myncell-1;
|
|
||||||
cc >= Local[ProcessId].mycelltab; cc--) {
|
|
||||||
q = *cc;
|
|
||||||
Mass(q) = 0.0;
|
|
||||||
Cost(q) = 0;
|
|
||||||
CLRV(Pos(q));
|
|
||||||
for (i = 0; i < NSUB; i++) {
|
|
||||||
r = Subp(q)[i];
|
|
||||||
if (r != NULL) {
|
|
||||||
while(!Done(r)) {
|
|
||||||
/* wait */
|
|
||||||
}
|
|
||||||
Mass(q) += Mass(r);
|
|
||||||
Cost(q) += Cost(r);
|
|
||||||
MULVS(tmpv, Pos(r), Mass(r));
|
|
||||||
ADDV(Pos(q), Pos(q), tmpv);
|
|
||||||
Done(r) = FALSE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DIVVS(Pos(q), Pos(q), Mass(q));
|
|
||||||
#ifdef QUADPOLE
|
|
||||||
CLRM(Quad(q));
|
|
||||||
for (i = 0; i < NSUB; i++) {
|
|
||||||
r = Subp(q)[i];
|
|
||||||
if (r != NULL) {
|
|
||||||
SUBV(dr, Pos(r), Pos(q));
|
|
||||||
OUTVP(drdr, dr, dr);
|
|
||||||
DOTVP(drsq, dr, dr);
|
|
||||||
SETMI(Idrsq);
|
|
||||||
MULMS(Idrsq, Idrsq, drsq);
|
|
||||||
MULMS(tmpm, drdr, 3.0);
|
|
||||||
SUBM(tmpm, tmpm, Idrsq);
|
|
||||||
MULMS(tmpm, tmpm, Mass(r));
|
|
||||||
ADDM(tmpm, tmpm, Quad(r));
|
|
||||||
ADDM(Quad(q), Quad(q), tmpm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
Done(q)=TRUE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cellptr SubdivideLeaf(leafptr le, cellptr parent, long l, long ProcessId)
|
|
||||||
{
|
|
||||||
cellptr c;
|
|
||||||
long i, index;
|
|
||||||
long xp[NDIM];
|
|
||||||
bodyptr bodies[MAX_BODIES_PER_LEAF];
|
|
||||||
long num_bodies;
|
|
||||||
bodyptr p;
|
|
||||||
|
|
||||||
/* first copy leaf's bodies to temp array, so we can reuse the leaf */
|
|
||||||
num_bodies = le->num_bodies;
|
|
||||||
for (i = 0; i < num_bodies; i++) {
|
|
||||||
bodies[i] = Bodyp(le)[i];
|
|
||||||
Bodyp(le)[i] = NULL;
|
|
||||||
}
|
|
||||||
le->num_bodies = 0;
|
|
||||||
/* create the parent cell for this subtree */
|
|
||||||
c = InitCell(parent, ProcessId);
|
|
||||||
ChildNum(c) = ChildNum(le);
|
|
||||||
/* do first particle separately, so we can reuse le */
|
|
||||||
p = bodies[0];
|
|
||||||
intcoord(xp, Pos(p));
|
|
||||||
index = subindex(xp, l);
|
|
||||||
Subp(c)[index] = (nodeptr) le;
|
|
||||||
ChildNum(le) = index;
|
|
||||||
Parent(le) = (nodeptr) c;
|
|
||||||
Level(le) = l >> 1;
|
|
||||||
/* set stuff for body */
|
|
||||||
Parent(p) = (nodeptr) le;
|
|
||||||
ChildNum(p) = le->num_bodies;
|
|
||||||
Level(p) = l >> 1;
|
|
||||||
/* insert the body */
|
|
||||||
Bodyp(le)[le->num_bodies++] = p;
|
|
||||||
/* now handle the rest */
|
|
||||||
for (i = 1; i < num_bodies; i++) {
|
|
||||||
p = bodies[i];
|
|
||||||
intcoord(xp, Pos(p));
|
|
||||||
index = subindex(xp, l);
|
|
||||||
if (!Subp(c)[index]) {
|
|
||||||
le = InitLeaf(c, ProcessId);
|
|
||||||
ChildNum(le) = index;
|
|
||||||
Subp(c)[index] = (nodeptr) le;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
le = (leafptr) Subp(c)[index];
|
|
||||||
}
|
|
||||||
Parent(p) = (nodeptr) le;
|
|
||||||
ChildNum(p) = le->num_bodies;
|
|
||||||
Level(p) = l >> 1;
|
|
||||||
Bodyp(le)[le->num_bodies++] = p;
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* MAKECELL: allocation routine for cells.
|
|
||||||
*/
|
|
||||||
|
|
||||||
cellptr makecell(long ProcessId)
|
|
||||||
{
|
|
||||||
cellptr c;
|
|
||||||
long i, Mycell;
|
|
||||||
|
|
||||||
if (Local[ProcessId].mynumcell == maxmycell) {
|
|
||||||
error("makecell: Proc %ld needs more than %ld cells; increase fcells\n",
|
|
||||||
ProcessId,maxmycell);
|
|
||||||
}
|
|
||||||
Mycell = Local[ProcessId].mynumcell++;
|
|
||||||
c = Local[ProcessId].ctab + Mycell;
|
|
||||||
c->seqnum = ProcessId*maxmycell+Mycell;
|
|
||||||
Type(c) = CELL;
|
|
||||||
Done(c) = FALSE;
|
|
||||||
Mass(c) = 0.0;
|
|
||||||
for (i = 0; i < NSUB; i++) {
|
|
||||||
Subp(c)[i] = NULL;
|
|
||||||
}
|
|
||||||
Local[ProcessId].mycelltab[Local[ProcessId].myncell++] = c;
|
|
||||||
return (c);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* MAKELEAF: allocation routine for leaves.
|
|
||||||
*/
|
|
||||||
|
|
||||||
leafptr makeleaf(long ProcessId)
|
|
||||||
{
|
|
||||||
leafptr le;
|
|
||||||
long i, Myleaf;
|
|
||||||
|
|
||||||
if (Local[ProcessId].mynumleaf == maxmyleaf) {
|
|
||||||
error("makeleaf: Proc %ld needs more than %ld leaves; increase fleaves\n",
|
|
||||||
ProcessId,maxmyleaf);
|
|
||||||
}
|
|
||||||
Myleaf = Local[ProcessId].mynumleaf++;
|
|
||||||
le = Local[ProcessId].ltab + Myleaf;
|
|
||||||
le->seqnum = ProcessId * maxmyleaf + Myleaf;
|
|
||||||
Type(le) = LEAF;
|
|
||||||
Done(le) = FALSE;
|
|
||||||
Mass(le) = 0.0;
|
|
||||||
le->num_bodies = 0;
|
|
||||||
for (i = 0; i < MAX_BODIES_PER_LEAF; i++) {
|
|
||||||
Bodyp(le)[i] = NULL;
|
|
||||||
}
|
|
||||||
Local[ProcessId].myleaftab[Local[ProcessId].mynleaf++] = le;
|
|
||||||
return (le);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
#ifndef _LOAD_H_
|
|
||||||
#define _LOAD_H_
|
|
||||||
|
|
||||||
void maketree(long ProcessId);
|
|
||||||
cellptr InitCell(cellptr parent, long ProcessId);
|
|
||||||
leafptr InitLeaf(cellptr parent, long ProcessId);
|
|
||||||
void printtree(nodeptr n);
|
|
||||||
nodeptr loadtree(bodyptr p, cellptr root, long ProcessId);
|
|
||||||
bool intcoord(long xp[NDIM], vector rp);
|
|
||||||
long subindex(long x[NDIM], long l);
|
|
||||||
void hackcofm(long ProcessId);
|
|
||||||
cellptr SubdivideLeaf(leafptr le, cellptr parent, long l, long ProcessId);
|
|
||||||
cellptr makecell(long ProcessId);
|
|
||||||
leafptr makeleaf(long ProcessId);
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
Binary file not shown.
|
@ -1,119 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* STDINC.H: standard include file for C programs.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _STDINC_H_
|
|
||||||
#define _STDINC_H_
|
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <sys/times.h>
|
|
||||||
|
|
||||||
#define error(msg, ...) printf(msg, ##__VA_ARGS__);
|
|
||||||
/*
|
|
||||||
* STREAM: a replacement for FILE *.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef FILE *stream;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* BOOL, TRUE and FALSE: standard names for logical values.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef long bool;
|
|
||||||
|
|
||||||
#ifndef TRUE
|
|
||||||
|
|
||||||
#define FALSE 0
|
|
||||||
#define TRUE 1
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* BYTE: a short name for a handy chunk of bits.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef char byte;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* STRING: for null-terminated strings which are not taken apart.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef char *string;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* REAL: default type is double;
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef double real, *realptr;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PROC, IPROC, RPROC: pointers to procedures, integer functions, and
|
|
||||||
* real-valued functions, respectively.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef void (*proced)();
|
|
||||||
typedef long (*iproc)();
|
|
||||||
typedef real (*rproc)();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* LOCAL: declare something to be local to a file.
|
|
||||||
* PERMANENT: declare something to be permanent data within a function.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define local static
|
|
||||||
#define permanent static
|
|
||||||
|
|
||||||
/*
|
|
||||||
* STREQ: handy string-equality macro.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define streq(x,y) (strcmp((x), (y)) == 0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PI, etc. -- mathematical constants
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define PI 3.14159265358979323846
|
|
||||||
#define TWO_PI 6.28318530717958647693
|
|
||||||
#define FOUR_PI 12.56637061435917295385
|
|
||||||
#define HALF_PI 1.57079632679489661923
|
|
||||||
#define FRTHRD_PI 4.18879020478639098462
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ABS: returns the absolute value of its argument
|
|
||||||
* MAX: returns the argument with the highest value
|
|
||||||
* MIN: returns the argument with the lowest value
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define ABS(x) (((x) < 0) ? -(x) : (x))
|
|
||||||
|
|
||||||
#include "vectmath.h"
|
|
||||||
#include "defs.h"
|
|
||||||
#include "code.h"
|
|
||||||
#include "util.h"
|
|
||||||
#include "load.h"
|
|
||||||
#include "code_io.h"
|
|
||||||
#include "grav.h"
|
|
||||||
#include "getparam.h"
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,71 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
#define global extern
|
|
||||||
|
|
||||||
#include "stdinc.h"
|
|
||||||
|
|
||||||
#define HZ 60.0
|
|
||||||
#define MULT 1103515245
|
|
||||||
#define ADD 12345
|
|
||||||
#define MASK (0x7FFFFFFF)
|
|
||||||
#define TWOTO31 2147483648.0
|
|
||||||
|
|
||||||
local long A = 1;
|
|
||||||
local long B = 0;
|
|
||||||
local long randx = 1;
|
|
||||||
local long lastrand; /* the last random number */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XRAND: generate floating-point random number.
|
|
||||||
*/
|
|
||||||
|
|
||||||
double xrand(double xl, double xh)
|
|
||||||
{
|
|
||||||
return (xl + (xh - xl) * prand());
|
|
||||||
}
|
|
||||||
|
|
||||||
void pranset(long seed)
|
|
||||||
{
|
|
||||||
A = 1;
|
|
||||||
B = 0;
|
|
||||||
randx = (A*seed+B) & MASK;
|
|
||||||
A = (MULT * A) & MASK;
|
|
||||||
B = (MULT*B + ADD) & MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
double prand()
|
|
||||||
/*
|
|
||||||
Return a random double in [0, 1.0)
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
lastrand = randx;
|
|
||||||
randx = (A*randx+B) & MASK;
|
|
||||||
return((double)lastrand/TWOTO31);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* CPUTIME: compute CPU time in min.
|
|
||||||
*/
|
|
||||||
double cputime()
|
|
||||||
{
|
|
||||||
struct tms buffer;
|
|
||||||
|
|
||||||
if (times(&buffer) == (clock_t)-1)
|
|
||||||
error("times() call failed\n");
|
|
||||||
return (buffer.tms_utime / (60.0 * HZ));
|
|
||||||
}
|
|
|
@ -1,9 +0,0 @@
|
||||||
#ifndef _UTIL_H_
|
|
||||||
#define _UTIL_H_
|
|
||||||
|
|
||||||
double xrand(double xl, double xh);
|
|
||||||
void pranset(long seed);
|
|
||||||
double prand(void);
|
|
||||||
double cputime(void);
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,305 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* VECTMATH.H: include file for vector/matrix operations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _VECMATH_H_
|
|
||||||
#define _VECMATH_H_
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define NDIM 3L
|
|
||||||
|
|
||||||
typedef real vector[NDIM], matrix[NDIM][NDIM];
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Vector operations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define CLRV(v) /* CLeaR Vector */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] = 0.0; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define UNITV(v,j) /* UNIT Vector */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] = (_i == (j) ? 1.0 : 0.0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SETV(v,u) /* SET Vector */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] = (u)[_i]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define ADDV(v,u,w) /* ADD Vector */ \
|
|
||||||
{ \
|
|
||||||
register real *_vp = (v), *_up = (u), *_wp = (w); \
|
|
||||||
*_vp++ = (*_up++) + (*_wp++); \
|
|
||||||
*_vp++ = (*_up++) + (*_wp++); \
|
|
||||||
*_vp = (*_up ) + (*_wp ); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SUBV(v,u,w) /* SUBtract Vector */ \
|
|
||||||
{ \
|
|
||||||
register real *_vp = (v), *_up = (u), *_wp = (w); \
|
|
||||||
*_vp++ = (*_up++) - (*_wp++); \
|
|
||||||
*_vp++ = (*_up++) - (*_wp++); \
|
|
||||||
*_vp = (*_up ) - (*_wp ); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MULVS(v,u,s) /* MULtiply Vector by Scalar */ \
|
|
||||||
{ \
|
|
||||||
register real *_vp = (v), *_up = (u); \
|
|
||||||
*_vp++ = (*_up++) * (s); \
|
|
||||||
*_vp++ = (*_up++) * (s); \
|
|
||||||
*_vp = (*_up ) * (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define DIVVS(v,u,s) /* DIVide Vector by Scalar */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] = (u)[_i] / (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define DOTVP(s,v,u) /* DOT Vector Product */ \
|
|
||||||
{ \
|
|
||||||
register real *_vp = (v), *_up = (u); \
|
|
||||||
(s) = (*_vp++) * (*_up++); \
|
|
||||||
(s) += (*_vp++) * (*_up++); \
|
|
||||||
(s) += (*_vp ) * (*_up ); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define ABSV(s,v) /* ABSolute value of a Vector */ \
|
|
||||||
{ \
|
|
||||||
double _tmp, sqrt(); \
|
|
||||||
register long _i; \
|
|
||||||
_tmp = 0.0; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
_tmp += (v)[_i] * (v)[_i]; \
|
|
||||||
(s) = sqrt(_tmp); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DISTV(s,u,v) /* DISTance between Vectors */ \
|
|
||||||
{ \
|
|
||||||
double _tmp, sqrt(); \
|
|
||||||
register long _i; \
|
|
||||||
_tmp = 0.0; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
_tmp += ((u)[_i]-(v)[_i]) * ((u)[_i]-(v)[_i]); \
|
|
||||||
(s) = sqrt(_tmp); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define CROSSVP(v,u,w) /* CROSS Vector Product */ \
|
|
||||||
{ \
|
|
||||||
(v)[0] = (u)[1]*(w)[2] - (u)[2]*(w)[1]; \
|
|
||||||
(v)[1] = (u)[2]*(w)[0] - (u)[0]*(w)[2]; \
|
|
||||||
(v)[2] = (u)[0]*(w)[1] - (u)[1]*(w)[0]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define INCADDV(v,u) /* INCrementally ADD Vector */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] += (u)[_i]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define INCSUBV(v,u) /* INCrementally SUBtract Vector */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] -= (u)[_i]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define INCMULVS(v,s) /* INCrementally MULtiply Vector by Scalar */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] *= (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define INCDIVVS(v,s) /* INCrementally DIVide Vector by Scalar */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] /= (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Matrix operations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define CLRM(p) /* CLeaR Matrix */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = 0.0; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SETMI(p) /* SET Matrix to Identity */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (_i == _j ? 1.0 : 0.0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SETM(p,q) /* SET Matrix */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (q)[_i][_j]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TRANM(p,q) /* TRANspose Matrix */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (q)[_j][_i]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ADDM(p,q,r) /* ADD Matrix */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (q)[_i][_j] + (r)[_i][_j]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SUBM(p,q,r) /* SUBtract Matrix */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (q)[_i][_j] - (r)[_i][_j]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MULM(p,q,r) /* Multiply Matrix */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j, _k; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) { \
|
|
||||||
(p)[_i][_j] = 0.0; \
|
|
||||||
for (_k = 0; _k < NDIM; _k++) \
|
|
||||||
(p)[_i][_j] += (q)[_i][_k] * (r)[_k][_j]; \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MULMS(p,q,s) /* MULtiply Matrix by Scalar */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (q)[_i][_j] * (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DIVMS(p,q,s) /* DIVide Matrix by Scalar */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (q)[_i][_j] / (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MULMV(v,p,u) /* MULtiply Matrix by Vector */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) { \
|
|
||||||
(v)[_i] = 0.0; \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(v)[_i] += (p)[_i][_j] * (u)[_j]; \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define OUTVP(p,v,u) /* OUTer Vector Product */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (v)[_i] * (u)[_j]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TRACEM(s,p) /* TRACE of Matrix */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
(s) = 0.0; \
|
|
||||||
for (_i = 0.0; _i < NDIM; _i++) \
|
|
||||||
(s) += (p)[_i][_i]; \
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Misc. impure operations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define SETVS(v,s) /* SET Vector to Scalar */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] = (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ADDVS(v,u,s) /* ADD Vector and Scalar */ \
|
|
||||||
{ \
|
|
||||||
register long _i; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
(v)[_i] = (u)[_i] + (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SETMS(p,s) /* SET Matrix to Scalar */ \
|
|
||||||
{ \
|
|
||||||
register long _i, _j; \
|
|
||||||
for (_i = 0; _i < NDIM; _i++) \
|
|
||||||
for (_j = 0; _j < NDIM; _j++) \
|
|
||||||
(p)[_i][_j] = (s); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define PRTV(name, vec) /* PRinT Vector */ \
|
|
||||||
{ \
|
|
||||||
fprintf(stdout,"%s = [%9.4f,%9.4f,%9.4f] ",name,vec[0],vec[1],vec[2]); \
|
|
||||||
}
|
|
||||||
#define PRIV(name, vec) /* PRint Integer Vector */ \
|
|
||||||
{ \
|
|
||||||
fprintf(stdout,"%s = [%d,%d,%d] ",name,vec[0],vec[1],vec[2]); \
|
|
||||||
}
|
|
||||||
#define PROV(name, vec) /* PRint Integer Vector */ \
|
|
||||||
{ \
|
|
||||||
fprintf(stdout,"%s = [%o,%o,%o] ",name,vec[0],vec[1],vec[2]); \
|
|
||||||
}
|
|
||||||
#define PRHV(name, vec) /* PRint Integer Vector */ \
|
|
||||||
{ \
|
|
||||||
fprintf(stdout,"%s = [%x,%x,%x] ",name,vec[0],vec[1],vec[2]); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,26 +0,0 @@
|
||||||
TARGET = FMM
|
|
||||||
OBJS = box.o construct_grid.o cost_zones.o defs.o fmm.o interactions.o memory.o particle.o partition_grid.o
|
|
||||||
|
|
||||||
include ../../Makefile.config
|
|
||||||
|
|
||||||
defs.c: defs.h
|
|
||||||
box.c: box.h
|
|
||||||
construct_grid.c: construct_grid.h
|
|
||||||
cost_zones.c: cost_zones.h
|
|
||||||
interactions.c: interactions.h
|
|
||||||
memory.c: memory.h
|
|
||||||
particle.c: particle.h
|
|
||||||
partition_grid.c: partition_grid.h
|
|
||||||
|
|
||||||
fmm.o: fmm.C defs.h memory.h particle.h box.h partition_grid.h cost_zones.h construct_grid.h interactions.h
|
|
||||||
interactions.o: interactions.C defs.h memory.h particle.h box.h partition_grid.h interactions.h
|
|
||||||
shell.o: shell.C defs.h memory.h particle.h box.h partition_grid.h interactions.h
|
|
||||||
construct_grid.o: construct_grid.C defs.h memory.h particle.h box.h partition_grid.h construct_grid.h
|
|
||||||
cost_zones.o: cost_zones.C defs.h memory.h box.h partition_grid.h cost_zones.h
|
|
||||||
#orb.o: orb.C defs.h memory.h box.h partition_grid.h orb.h
|
|
||||||
partition_grid.o: partition_grid.C defs.h memory.h box.h partition_grid.h
|
|
||||||
box.o: box.C defs.h memory.h particle.h box.h
|
|
||||||
particle.o: particle.C defs.h memory.h particle.h
|
|
||||||
memory.o: memory.C defs.h memory.h
|
|
||||||
defs.o: defs.C defs.h memory.h
|
|
||||||
memory.h: defs.h particle.h box.h
|
|
|
@ -1,39 +0,0 @@
|
||||||
GENERAL INFORMATION:
|
|
||||||
|
|
||||||
The FMM application implements a parallel adaptive Fast Multipole Method
|
|
||||||
to simulate the interaction of a system of bodies (N-body problem). A
|
|
||||||
description of this implementation can be found in:
|
|
||||||
|
|
||||||
Singh, J. P., et. al. A Parallel Adaptive Fast Multipole Method.
|
|
||||||
Proceedings of Supercomputing 93, November 1993.
|
|
||||||
|
|
||||||
RUNNING THE PROGRAM:
|
|
||||||
|
|
||||||
To see how to run the program, please see the comment at the top of the
|
|
||||||
file fmm.C, or run the application with the "-h" command line option.
|
|
||||||
Optional command line parameters allow for individual processor timing
|
|
||||||
statistics to be printed out, as well as the final particle positions.
|
|
||||||
Input parameters can be placed in an input file and redirected through
|
|
||||||
standard input. Of the nine input parameters, the ones which would
|
|
||||||
normally be changed are the number of particles and the number of
|
|
||||||
processors. If other parameters are changed, these changes should be
|
|
||||||
reported in any results that are presented. Sample input files are
|
|
||||||
included in the inputs subdirectory.
|
|
||||||
|
|
||||||
Sample output for a 1 processor run with the input file inputs/input.256
|
|
||||||
and the timing and output flags specified is contained in the file
|
|
||||||
correct.out.
|
|
||||||
|
|
||||||
BASE PROBLEM SIZE:
|
|
||||||
|
|
||||||
The base problem size for an upto-64 processor machine is 16,384
|
|
||||||
particles. For this many particles, you can use the input file provided
|
|
||||||
(and change only the number of processors).
|
|
||||||
|
|
||||||
DATA DISTRIBUTION:
|
|
||||||
|
|
||||||
Our "POSSIBLE ENHANCEMENT" comments in the source code tell where one
|
|
||||||
might want to distribute data and how. Data distribution, however, does
|
|
||||||
not make much difference to performance on the Stanford DASH
|
|
||||||
multiprocessor.
|
|
||||||
|
|
|
@ -1,367 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include "defs.h"
|
|
||||||
#include "memory.h"
|
|
||||||
#include "particle.h"
|
|
||||||
#include "box.h"
|
|
||||||
|
|
||||||
/* How many boxes can fit on one line */
|
|
||||||
#define BOXES_PER_LINE 4
|
|
||||||
#define TERMS_PER_LINE 2
|
|
||||||
|
|
||||||
box *Grid = NULL;
|
|
||||||
|
|
||||||
void ZeroBox(long my_id, box *b);
|
|
||||||
|
|
||||||
void
|
|
||||||
CreateBoxes (long my_id, long num_boxes)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
LOCK(G_Memory->mal_lock);
|
|
||||||
Local[my_id].B_Heap = (box *) G_MALLOC(num_boxes * sizeof(box));
|
|
||||||
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute the
|
|
||||||
B_Heap data across physically distributed memories as desired.
|
|
||||||
|
|
||||||
One way to do this is as follows:
|
|
||||||
|
|
||||||
char *starting_address;
|
|
||||||
char *ending_address;
|
|
||||||
|
|
||||||
starting_address = (char *) Local[my_id].B_Heap;
|
|
||||||
ending_address = (((char *) Local[my_id].B_Heap)
|
|
||||||
+ (num_boxes * sizeof(particle *)) - 1);
|
|
||||||
|
|
||||||
Place all addresses x such that (starting_address <= x < ending_address)
|
|
||||||
on node my_id
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
UNLOCK(G_Memory->mal_lock);
|
|
||||||
Local[my_id].Max_B_Heap = num_boxes;
|
|
||||||
Local[my_id].Index_B_Heap = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < num_boxes; i++) {
|
|
||||||
Local[my_id].B_Heap[i].exp_lock_index = i % (MAX_LOCKS - 1);
|
|
||||||
Local[my_id].B_Heap[i].particle_lock_index = i % (MAX_LOCKS - 1);
|
|
||||||
Local[my_id].B_Heap[i].id = i + ((double) my_id / ID_LIMIT);
|
|
||||||
ZeroBox(my_id, &Local[my_id].B_Heap[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
FreeBoxes (long my_id)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
box *b_array;
|
|
||||||
|
|
||||||
b_array = Local[my_id].B_Heap;
|
|
||||||
for (i = 0; i < Local[my_id].Index_B_Heap; i++)
|
|
||||||
ZeroBox(my_id, &b_array[i]);
|
|
||||||
Local[my_id].Index_B_Heap = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ZeroBox (long my_id, box *b)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
b->type = CHILDLESS;
|
|
||||||
b->num_particles = 0;
|
|
||||||
for (i = 0; i < MAX_PARTICLES_PER_BOX; i++)
|
|
||||||
b->particles[i] = NULL;
|
|
||||||
b->parent = NULL;
|
|
||||||
for (i = 0; i < NUM_OFFSPRING; i++) {
|
|
||||||
b->children[i] = NULL;
|
|
||||||
b->shadow[i] = NULL;
|
|
||||||
}
|
|
||||||
b->num_children = 0;
|
|
||||||
b->construct_synch = 0;
|
|
||||||
b->interaction_synch = 0;
|
|
||||||
b->cost = 0;
|
|
||||||
b->proc = my_id;
|
|
||||||
b->subtree_cost = 0;
|
|
||||||
b->next = NULL;
|
|
||||||
b->prev = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* InitBox (long my_id, real x_center, real y_center, real length, long level, box *parent)
|
|
||||||
*
|
|
||||||
* Args : the x_center and y_center of the center of the box;
|
|
||||||
* the length of the box;
|
|
||||||
* the level of the box;
|
|
||||||
* the address of b's parent.
|
|
||||||
*
|
|
||||||
* Returns : the address of the newly created box.
|
|
||||||
*
|
|
||||||
* Side Effects : Initializes num_particles to 0, all other pointers to NULL,
|
|
||||||
* and sets the box ID to a unique number. It also creates the space for
|
|
||||||
* the two expansion arrays.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
box *
|
|
||||||
InitBox (long my_id, real x_center, real y_center, real length, box *parent)
|
|
||||||
{
|
|
||||||
box *b;
|
|
||||||
|
|
||||||
if (Local[my_id].Index_B_Heap == Local[my_id].Max_B_Heap) {
|
|
||||||
LockedPrint("ERROR (P%d) : Ran out of boxes\n", my_id);
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
b = &Local[my_id].B_Heap[Local[my_id].Index_B_Heap++];
|
|
||||||
b->x_center = x_center;
|
|
||||||
b->y_center = y_center;
|
|
||||||
b->length = length;
|
|
||||||
b->parent = parent;
|
|
||||||
if (parent == NULL)
|
|
||||||
b->level = 0;
|
|
||||||
else
|
|
||||||
b->level = parent->level + 1;
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PrintBox (box *b)
|
|
||||||
*
|
|
||||||
* Args : the address of a box, b.
|
|
||||||
*
|
|
||||||
* Returns : nothing.
|
|
||||||
*
|
|
||||||
* Side Effects : Prints to stdout the information stored for b.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
PrintBox (box *b)
|
|
||||||
{
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
fflush(stdout);
|
|
||||||
if (b != NULL) {
|
|
||||||
printf("Info for B%f :\n", b->id);
|
|
||||||
printf(" X center = %.40g\n", b->x_center);
|
|
||||||
printf(" Y center = %.40g\n", b->y_center);
|
|
||||||
printf(" Length = %.40g\n", b->length);
|
|
||||||
printf(" Level = %ld\n", b->level);
|
|
||||||
printf(" Type = %d\n", b->type);
|
|
||||||
printf(" Child Num = %ld\n", b->child_num);
|
|
||||||
if (b->parent == NULL)
|
|
||||||
printf(" Parent = NONE\n");
|
|
||||||
else
|
|
||||||
printf(" Parent = B%f\n", b->parent->id);
|
|
||||||
printf(" Children's IDs : ");
|
|
||||||
if (b->num_children != 0)
|
|
||||||
PrintBoxArrayIds(b->children, b->num_children);
|
|
||||||
else
|
|
||||||
printf("NONE\n");
|
|
||||||
printf(" Sibling's IDs : ");
|
|
||||||
if (b->num_siblings != 0)
|
|
||||||
PrintBoxArrayIds(b->siblings, b->num_siblings);
|
|
||||||
else
|
|
||||||
printf("NONE\n");
|
|
||||||
printf(" Colleagues' IDs : ");
|
|
||||||
PrintBoxArrayIds(b->colleagues, b->num_colleagues);
|
|
||||||
printf(" U List IDs : ");
|
|
||||||
PrintBoxArrayIds(b->u_list, b->num_u_list);
|
|
||||||
printf(" V List IDs : ");
|
|
||||||
PrintBoxArrayIds(b->v_list, b->num_v_list);
|
|
||||||
printf(" W List IDs : ");
|
|
||||||
PrintBoxArrayIds(b->w_list, b->num_w_list);
|
|
||||||
printf(" # of Particles = %ld\n", b->num_particles);
|
|
||||||
printf(" Particles' IDs : ");
|
|
||||||
PrintParticleArrayIds(b->particles, b->num_particles);
|
|
||||||
printf(" Assigned Process ID : %ld\n", b->proc);
|
|
||||||
printf(" Cost : %ld\n", b->cost);
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
printf("Box has not been initialized yet.\n\n");
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PrintBoxArrayIds (box_node *b_array[], long array_length)
|
|
||||||
*
|
|
||||||
* Args : the address of the box array, b_array;
|
|
||||||
* the length of the array, array_length.
|
|
||||||
*
|
|
||||||
* Returns : nothing.
|
|
||||||
*
|
|
||||||
* Side Effects : Prints to stdout just the id numbers for every box in
|
|
||||||
* b_array.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
PrintBoxArrayIds (box *b_array[], long array_length)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long tab_count;
|
|
||||||
|
|
||||||
tab_count = 0;
|
|
||||||
for (i = 0; i < array_length; i++) {
|
|
||||||
if (tab_count == 0) {
|
|
||||||
printf("\n");
|
|
||||||
tab_count = BOXES_PER_LINE;
|
|
||||||
}
|
|
||||||
if (b_array[i] != NULL)
|
|
||||||
printf("\tB%f", b_array[i]->id);
|
|
||||||
tab_count -= 1;
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PrintExpansionTerms (real expansion[])
|
|
||||||
*
|
|
||||||
* Args : the array of expansion terms, expansion.
|
|
||||||
*
|
|
||||||
* Returns : nothing.
|
|
||||||
*
|
|
||||||
* Side Effects : Prints to stdout the contents of expansion.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
PrintExpansionTerms (complex expansion[])
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long tab_count = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
if (tab_count == 0) {
|
|
||||||
printf("\n");
|
|
||||||
tab_count = TERMS_PER_LINE;
|
|
||||||
}
|
|
||||||
if (expansion[i].i >= (real) 0.0)
|
|
||||||
printf("\ta%ld = %.3e + %.3ei", i, expansion[i].r, expansion[i].i);
|
|
||||||
else
|
|
||||||
printf("\ta%ld = %.3e - %.3ei", i, expansion[i].r, -expansion[i].i);
|
|
||||||
tab_count -= 1;
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ListIterate (long my_id, box *b, box **list, long length, list_function function)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
for (i = 0; i < length; i++) {
|
|
||||||
if (list[i] == NULL) {
|
|
||||||
LockedPrint("ERROR (P%d) : NULL list entry\n", my_id);
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
(*function)(my_id, list[i], b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* AdjacentBoxes (box *b1, box *b2)
|
|
||||||
*
|
|
||||||
* Args : two potentially adjacent boxes, b1 and b2.
|
|
||||||
*
|
|
||||||
* Returns : TRUE, if boxes are adjacent, FALSE if not.
|
|
||||||
*
|
|
||||||
* Side Effects : none.
|
|
||||||
*
|
|
||||||
* Comments : Two boxes are adjacent if their centers are separated in either
|
|
||||||
* the x or y directions by (1/2 the length of b1) + (1/2 length of b2),
|
|
||||||
* and separated in the other direction by a distance less than or equal
|
|
||||||
* to (1/2 the length of b1) + (1/2 the length of b2).
|
|
||||||
*
|
|
||||||
* NOTE : By this definition, parents are NOT adjacent to their children.
|
|
||||||
*/
|
|
||||||
long
|
|
||||||
AdjacentBoxes (box *b1, box *b2)
|
|
||||||
{
|
|
||||||
real exact_separation;
|
|
||||||
real x_separation;
|
|
||||||
real y_separation;
|
|
||||||
long ret_val;
|
|
||||||
|
|
||||||
exact_separation = (b1->length / (real) 2.0) + (b2->length / (real) 2.0);
|
|
||||||
x_separation = (real) fabs((double)(b1->x_center - b2->x_center));
|
|
||||||
y_separation = (real) fabs((double)(b1->y_center - b2->y_center));
|
|
||||||
|
|
||||||
if ((x_separation == exact_separation) &&
|
|
||||||
(y_separation <= exact_separation))
|
|
||||||
ret_val = TRUE;
|
|
||||||
else
|
|
||||||
if ((y_separation == exact_separation) &&
|
|
||||||
(x_separation <= exact_separation))
|
|
||||||
ret_val = TRUE;
|
|
||||||
else
|
|
||||||
ret_val = FALSE;
|
|
||||||
|
|
||||||
return ret_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* WellSeparatedBoxes (box *b1, box *b2)
|
|
||||||
*
|
|
||||||
* Args : Two potentially well separated boxes, b1 and b2.
|
|
||||||
*
|
|
||||||
* Returns : TRUE, if the two boxes are well separated, and FALSE if not.
|
|
||||||
*
|
|
||||||
* Side Effects : none.
|
|
||||||
*
|
|
||||||
* Comments : Well separated means that the two boxes are separated by the
|
|
||||||
* length of the boxes. If one of the boxes is bigger than the other,
|
|
||||||
* the smaller box is given the length of the larger box. This means
|
|
||||||
* that the centers of the two boxes, regardless of their relative size,
|
|
||||||
* must be separated in the x or y direction (or both) by at least
|
|
||||||
* twice the length of the biggest box.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
long
|
|
||||||
WellSeparatedBoxes (box *b1, box *b2)
|
|
||||||
{
|
|
||||||
real min_ws_distance;
|
|
||||||
real x_separation;
|
|
||||||
real y_separation;
|
|
||||||
long ret_val;
|
|
||||||
|
|
||||||
if (b1->length > b2->length)
|
|
||||||
min_ws_distance = b1->length * (real) 2.0;
|
|
||||||
else
|
|
||||||
min_ws_distance = b2->length * (real) 2.0;
|
|
||||||
|
|
||||||
x_separation = (real) fabs((double)(b1->x_center - b2->x_center));
|
|
||||||
y_separation = (real) fabs((double)(b1->y_center - b2->y_center));
|
|
||||||
|
|
||||||
if ((x_separation >= min_ws_distance) || (y_separation >= min_ws_distance))
|
|
||||||
ret_val = TRUE;
|
|
||||||
else
|
|
||||||
ret_val = FALSE;
|
|
||||||
|
|
||||||
return ret_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#undef BOXES_PER_LINE
|
|
||||||
#undef TERMS_PER_LINE
|
|
||||||
|
|
|
@ -1,134 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _Box_H
|
|
||||||
#define _Box_H 1
|
|
||||||
|
|
||||||
#include "defs.h"
|
|
||||||
#include "particle.h"
|
|
||||||
|
|
||||||
/* This definition sets the maximum number of particles allowed per box. */
|
|
||||||
#define MAX_PARTICLES_PER_BOX 40
|
|
||||||
|
|
||||||
/* This definition sets the number of subdivisions (offspring) of a box. */
|
|
||||||
#define NUM_OFFSPRING 4
|
|
||||||
#define MAX_SIBLINGS (NUM_OFFSPRING - 1)
|
|
||||||
#define MAX_COLLEAGUES 8
|
|
||||||
#define MAX_U_LIST 20
|
|
||||||
#define MAX_V_LIST 27
|
|
||||||
#define MAX_W_LIST 30
|
|
||||||
#define MAX_EXPANSION_TERMS 40
|
|
||||||
|
|
||||||
typedef struct _Box box;
|
|
||||||
typedef struct _Box_Node box_node;
|
|
||||||
|
|
||||||
typedef void (*list_function)(long my_id, box *list_box, box *b);
|
|
||||||
|
|
||||||
typedef enum { CHILDLESS, PARENT } box_type;
|
|
||||||
|
|
||||||
#define ID_LIMIT 1000000
|
|
||||||
|
|
||||||
/* Every box has :
|
|
||||||
* 1. A unique ID number (made up of a unique ID number per processor plus
|
|
||||||
* the ID of the processor that created the box)
|
|
||||||
* 2.- 3. An x and y position for its center
|
|
||||||
* 4. The length of the box (measured as the length of one of its sides)
|
|
||||||
* 5. The level of ancestry of the box (how many parents do you have to
|
|
||||||
* visit before the first box is found?)
|
|
||||||
* 6. The number of particles in the box
|
|
||||||
* 7. A list of those particles
|
|
||||||
* 8. A pointer to its parent
|
|
||||||
* 9. The number of children
|
|
||||||
* 10. A list of its children
|
|
||||||
* 11. The number of siblings
|
|
||||||
* 12. A list of its siblings
|
|
||||||
* 13. A linked list of its colleagues
|
|
||||||
* 14. A linked list representing list 1 in RR #496
|
|
||||||
* 15. A linked list representing list 2 in RR #496
|
|
||||||
* 16. A linked list representing list 3 in RR #496
|
|
||||||
* 17. An array of its multipole expansion terms.
|
|
||||||
* 18. An array of its local expansion terms.
|
|
||||||
* 19. The id of the processor that is working on the box.
|
|
||||||
* 20. The amount of computational work associated with the box.
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct _Box
|
|
||||||
{
|
|
||||||
double id;
|
|
||||||
real x_center;
|
|
||||||
real y_center;
|
|
||||||
real length;
|
|
||||||
long level;
|
|
||||||
box_type type;
|
|
||||||
particle *particles[MAX_PARTICLES_PER_BOX + 1];
|
|
||||||
long num_particles;
|
|
||||||
box *parent;
|
|
||||||
long child_num;
|
|
||||||
box *shadow[NUM_OFFSPRING];
|
|
||||||
box *children[NUM_OFFSPRING];
|
|
||||||
long num_children;
|
|
||||||
box *siblings[MAX_SIBLINGS];
|
|
||||||
long num_siblings;
|
|
||||||
box *colleagues[MAX_COLLEAGUES];
|
|
||||||
long num_colleagues;
|
|
||||||
box *u_list[MAX_U_LIST];
|
|
||||||
long num_u_list;
|
|
||||||
box *v_list[MAX_V_LIST];
|
|
||||||
long num_v_list;
|
|
||||||
box *w_list[MAX_W_LIST];
|
|
||||||
long num_w_list;
|
|
||||||
complex mp_expansion[MAX_EXPANSION_TERMS];
|
|
||||||
complex local_expansion[MAX_EXPANSION_TERMS];
|
|
||||||
complex x_expansion[MAX_EXPANSION_TERMS];
|
|
||||||
long exp_lock_index;
|
|
||||||
long particle_lock_index;
|
|
||||||
volatile long construct_synch;
|
|
||||||
volatile long interaction_synch;
|
|
||||||
long proc;
|
|
||||||
long cost;
|
|
||||||
long u_cost;
|
|
||||||
long v_cost;
|
|
||||||
long w_cost;
|
|
||||||
long p_cost;
|
|
||||||
long subtree_cost;
|
|
||||||
box *next;
|
|
||||||
box *prev;
|
|
||||||
box *link1;
|
|
||||||
box *link2;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/* This structure is used for a linked list of boxes */
|
|
||||||
struct _Box_Node
|
|
||||||
{
|
|
||||||
box *data;
|
|
||||||
struct _Box_Node *next;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern box *Grid;
|
|
||||||
|
|
||||||
extern void CreateBoxes(long my_id, long num_boxes);
|
|
||||||
extern void FreeBoxes(long my_id);
|
|
||||||
extern box *InitBox(long my_id, real x_center, real y_center, real length, box *parent);
|
|
||||||
extern void PrintBox(box *b);
|
|
||||||
extern void PrintBoxArrayIds(box *b_array[], long array_length);
|
|
||||||
extern void PrintExpansionTerms(complex expansion[]);
|
|
||||||
|
|
||||||
extern void ListIterate(long my_id, box *b, box **list, long length, list_function function);
|
|
||||||
extern long AdjacentBoxes(box *b1, box *b2);
|
|
||||||
extern long WellSeparatedBoxes(box *b1, box *b2);
|
|
||||||
|
|
||||||
#endif /* _Box_H */
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,25 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _Construct_Grid_H
|
|
||||||
#define _Construct_Grid_H 1
|
|
||||||
|
|
||||||
extern void ConstructGrid(long my_id, time_info *local_time, long time_all);
|
|
||||||
extern void ConstructLists(long my_id, time_info *local_time, long time_all);
|
|
||||||
extern void DestroyGrid(long my_id, time_info *local_time, long time_all);
|
|
||||||
extern void PrintGrid(long my_id);
|
|
||||||
|
|
||||||
#endif /* _Construct_Grid_H */
|
|
|
@ -1,279 +0,0 @@
|
||||||
Expected output for input parameters from file inputs/input.256 with
|
|
||||||
output and timing flags specified:
|
|
||||||
|
|
||||||
|
|
||||||
Creating a two cluster, non uniform distribution for 256 particles
|
|
||||||
Starting FMM with 1 processor
|
|
||||||
Finished FMM
|
|
||||||
PROCESS STATISTICS
|
|
||||||
Track Tree List Part Pass Inter Bar Intra Other
|
|
||||||
Proc Time Time Time Time Time Time Time Time Time
|
|
||||||
0 0 0 0 0 0 0 0 0 0
|
|
||||||
|
|
||||||
TIMING INFORMATION
|
|
||||||
Start time : 782441462
|
|
||||||
Initialization finish time : 782441462
|
|
||||||
Overall finish time : 782441462
|
|
||||||
Total time with initialization : 0
|
|
||||||
Total time without initialization : 0
|
|
||||||
|
|
||||||
Total time for steps 3 to 5 : 0
|
|
||||||
|
|
||||||
PARTICLE POSITIONS
|
|
||||||
|
|
||||||
P 0 : Pos = ( -1.43411, -2.09109)
|
|
||||||
P 1 : Pos = ( -1.54648, -0.68562)
|
|
||||||
P 2 : Pos = ( -0.64031, 0.16307)
|
|
||||||
P 3 : Pos = ( 0.05947, -0.66152)
|
|
||||||
P 4 : Pos = ( -0.70371, -2.32670)
|
|
||||||
P 5 : Pos = ( -0.89875, -1.48421)
|
|
||||||
P 6 : Pos = ( -1.54906, -0.62613)
|
|
||||||
P 7 : Pos = ( 0.69844, 1.10018)
|
|
||||||
P 8 : Pos = ( -0.54329, -1.61378)
|
|
||||||
P 9 : Pos = ( -0.08989, -2.35112)
|
|
||||||
P 10 : Pos = ( -1.33669, -0.33496)
|
|
||||||
P 11 : Pos = ( -0.19485, -0.99712)
|
|
||||||
P 12 : Pos = ( -1.08720, -0.97597)
|
|
||||||
P 13 : Pos = ( -0.88950, -1.78681)
|
|
||||||
P 14 : Pos = ( -1.92000, -2.86092)
|
|
||||||
P 15 : Pos = ( 0.01118, -1.11592)
|
|
||||||
P 16 : Pos = ( 0.29956, -2.20402)
|
|
||||||
P 17 : Pos = ( -1.73261, -0.70957)
|
|
||||||
P 18 : Pos = ( 0.72752, 0.09104)
|
|
||||||
P 19 : Pos = ( -1.76686, -1.05004)
|
|
||||||
P 20 : Pos = ( -0.62225, -0.90594)
|
|
||||||
P 21 : Pos = ( -0.94715, -1.58968)
|
|
||||||
P 22 : Pos = ( -0.03266, -1.04470)
|
|
||||||
P 23 : Pos = ( -0.06388, -0.13640)
|
|
||||||
P 24 : Pos = ( -1.66374, -1.53045)
|
|
||||||
P 25 : Pos = ( -1.77486, -1.16405)
|
|
||||||
P 26 : Pos = ( -2.39597, -1.45824)
|
|
||||||
P 27 : Pos = ( -1.61490, -1.31192)
|
|
||||||
P 28 : Pos = ( -0.11696, -0.87015)
|
|
||||||
P 29 : Pos = ( -0.48613, -1.49041)
|
|
||||||
P 30 : Pos = ( -1.46477, -2.54577)
|
|
||||||
P 31 : Pos = ( -1.09388, -1.07751)
|
|
||||||
P 32 : Pos = ( -1.46100, -1.17696)
|
|
||||||
P 33 : Pos = ( -0.18779, -0.55415)
|
|
||||||
P 34 : Pos = ( 0.23057, 0.06852)
|
|
||||||
P 35 : Pos = ( -0.21021, -0.61339)
|
|
||||||
P 36 : Pos = ( -2.96834, -0.29539)
|
|
||||||
P 37 : Pos = ( -1.33559, -0.77310)
|
|
||||||
P 38 : Pos = ( -1.45288, -0.58590)
|
|
||||||
P 39 : Pos = ( -2.82349, -1.92800)
|
|
||||||
P 40 : Pos = ( -0.24231, -2.37768)
|
|
||||||
P 41 : Pos = ( -0.52698, -0.93986)
|
|
||||||
P 42 : Pos = ( -0.51194, -0.67306)
|
|
||||||
P 43 : Pos = ( -1.46181, -0.29485)
|
|
||||||
P 44 : Pos = ( -1.51229, -1.06319)
|
|
||||||
P 45 : Pos = ( -0.79540, -1.29090)
|
|
||||||
P 46 : Pos = ( -0.57868, -0.09248)
|
|
||||||
P 47 : Pos = ( -0.33401, -2.44589)
|
|
||||||
P 48 : Pos = ( -0.01213, -1.26073)
|
|
||||||
P 49 : Pos = ( -1.38279, -0.56367)
|
|
||||||
P 50 : Pos = ( -1.90767, -1.59006)
|
|
||||||
P 51 : Pos = ( -1.10363, -0.76771)
|
|
||||||
P 52 : Pos = ( -1.05322, -0.70645)
|
|
||||||
P 53 : Pos = ( -0.43662, -1.06196)
|
|
||||||
P 54 : Pos = ( -0.99445, 0.41950)
|
|
||||||
P 55 : Pos = ( -1.08575, -0.74978)
|
|
||||||
P 56 : Pos = ( -1.54303, -1.20363)
|
|
||||||
P 57 : Pos = ( -1.23607, -1.10169)
|
|
||||||
P 58 : Pos = ( -0.88698, -1.96075)
|
|
||||||
P 59 : Pos = ( -2.91866, -2.03021)
|
|
||||||
P 60 : Pos = ( 1.53310, -1.33704)
|
|
||||||
P 61 : Pos = ( -0.49453, -1.23276)
|
|
||||||
P 62 : Pos = ( -1.73482, 0.07055)
|
|
||||||
P 63 : Pos = ( -1.05005, -0.24271)
|
|
||||||
P 64 : Pos = ( -1.31181, -1.24434)
|
|
||||||
P 65 : Pos = ( -1.79726, 1.62154)
|
|
||||||
P 66 : Pos = ( -1.56322, -1.17794)
|
|
||||||
P 67 : Pos = ( -1.49587, -1.82000)
|
|
||||||
P 68 : Pos = ( -0.25687, -1.80248)
|
|
||||||
P 69 : Pos = ( -0.87176, -0.54912)
|
|
||||||
P 70 : Pos = ( -2.08087, -2.36369)
|
|
||||||
P 71 : Pos = ( -2.77277, -0.13150)
|
|
||||||
P 72 : Pos = ( -0.88101, -1.35167)
|
|
||||||
P 73 : Pos = ( -0.71981, -0.94017)
|
|
||||||
P 74 : Pos = ( 0.11335, -0.56285)
|
|
||||||
P 75 : Pos = ( -0.93693, -1.23228)
|
|
||||||
P 76 : Pos = ( -1.55307, -1.34658)
|
|
||||||
P 77 : Pos = ( -0.37223, -1.32314)
|
|
||||||
P 78 : Pos = ( -0.74784, -0.12910)
|
|
||||||
P 79 : Pos = ( -0.17029, -2.23523)
|
|
||||||
P 80 : Pos = ( -2.19951, 1.85571)
|
|
||||||
P 81 : Pos = ( -1.83973, -1.29899)
|
|
||||||
P 82 : Pos = ( 0.46179, -1.83450)
|
|
||||||
P 83 : Pos = ( -0.56821, -1.48287)
|
|
||||||
P 84 : Pos = ( -1.52386, -1.91689)
|
|
||||||
P 85 : Pos = ( -0.55720, -1.03627)
|
|
||||||
P 86 : Pos = ( -1.02957, -3.71620)
|
|
||||||
P 87 : Pos = ( -4.08440, 1.61353)
|
|
||||||
P 88 : Pos = ( -3.78035, 0.03563)
|
|
||||||
P 89 : Pos = ( -0.54816, -1.41596)
|
|
||||||
P 90 : Pos = ( -1.94317, -0.41962)
|
|
||||||
P 91 : Pos = ( -1.08228, -0.90690)
|
|
||||||
P 92 : Pos = ( -0.89155, -0.63790)
|
|
||||||
P 93 : Pos = ( -0.92724, -0.71553)
|
|
||||||
P 94 : Pos = ( -0.96707, -1.42639)
|
|
||||||
P 95 : Pos = ( -1.39903, -1.16314)
|
|
||||||
P 96 : Pos = ( -0.57333, -0.74190)
|
|
||||||
P 97 : Pos = ( -1.38755, -0.99592)
|
|
||||||
P 98 : Pos = ( 1.85628, 1.54037)
|
|
||||||
P 99 : Pos = ( -0.97737, -0.57102)
|
|
||||||
P 100 : Pos = ( -0.50769, -1.60342)
|
|
||||||
P 101 : Pos = ( -0.84604, -1.55463)
|
|
||||||
P 102 : Pos = ( 0.21192, -0.95452)
|
|
||||||
P 103 : Pos = ( -0.51392, -0.74877)
|
|
||||||
P 104 : Pos = ( -0.81335, -1.56088)
|
|
||||||
P 105 : Pos = ( -1.49047, -1.33111)
|
|
||||||
P 106 : Pos = ( -1.01388, -1.32191)
|
|
||||||
P 107 : Pos = ( -3.12680, -0.02822)
|
|
||||||
P 108 : Pos = ( -1.46754, -0.10543)
|
|
||||||
P 109 : Pos = ( -0.97791, -2.03745)
|
|
||||||
P 110 : Pos = ( 0.92297, -1.48565)
|
|
||||||
P 111 : Pos = ( -1.35069, -1.72285)
|
|
||||||
P 112 : Pos = ( 0.49270, -0.67037)
|
|
||||||
P 113 : Pos = ( -1.63986, -1.04857)
|
|
||||||
P 114 : Pos = ( -0.35524, -1.12787)
|
|
||||||
P 115 : Pos = ( -1.72972, 0.63613)
|
|
||||||
P 116 : Pos = ( -1.84838, -0.34173)
|
|
||||||
P 117 : Pos = ( -1.57914, -0.95206)
|
|
||||||
P 118 : Pos = ( -0.55701, -0.85381)
|
|
||||||
P 119 : Pos = ( -1.73082, 0.12909)
|
|
||||||
P 120 : Pos = ( -0.81612, -1.19132)
|
|
||||||
P 121 : Pos = ( -0.88562, 0.04683)
|
|
||||||
P 122 : Pos = ( -1.17066, -0.85713)
|
|
||||||
P 123 : Pos = ( -0.58563, -0.56109)
|
|
||||||
P 124 : Pos = ( -2.95537, -3.89308)
|
|
||||||
P 125 : Pos = ( -0.34982, -1.29778)
|
|
||||||
P 126 : Pos = ( -0.46937, -1.15248)
|
|
||||||
P 127 : Pos = ( -0.17294, -0.58438)
|
|
||||||
P 128 : Pos = ( 0.56589, -0.09110)
|
|
||||||
P 129 : Pos = ( 0.45352, 1.31437)
|
|
||||||
P 130 : Pos = ( 1.35968, 2.16306)
|
|
||||||
P 131 : Pos = ( 2.05947, 1.33847)
|
|
||||||
P 132 : Pos = ( 1.29629, -0.32670)
|
|
||||||
P 133 : Pos = ( 1.10125, 0.51578)
|
|
||||||
P 134 : Pos = ( 0.45094, 1.37386)
|
|
||||||
P 135 : Pos = ( 2.69843, 3.10017)
|
|
||||||
P 136 : Pos = ( 1.45671, 0.38621)
|
|
||||||
P 137 : Pos = ( 1.91010, -0.35113)
|
|
||||||
P 138 : Pos = ( 0.66331, 1.66503)
|
|
||||||
P 139 : Pos = ( 1.80515, 1.00288)
|
|
||||||
P 140 : Pos = ( 0.91279, 1.02402)
|
|
||||||
P 141 : Pos = ( 1.11050, 0.21320)
|
|
||||||
P 142 : Pos = ( 0.07999, -0.86092)
|
|
||||||
P 143 : Pos = ( 2.01118, 0.88407)
|
|
||||||
P 144 : Pos = ( 2.29956, -0.20402)
|
|
||||||
P 145 : Pos = ( 0.26739, 1.29042)
|
|
||||||
P 146 : Pos = ( 2.72752, 2.09102)
|
|
||||||
P 147 : Pos = ( 0.23314, 0.94995)
|
|
||||||
P 148 : Pos = ( 1.37775, 1.09405)
|
|
||||||
P 149 : Pos = ( 1.05285, 0.41031)
|
|
||||||
P 150 : Pos = ( 1.96734, 0.95529)
|
|
||||||
P 151 : Pos = ( 1.93612, 1.86359)
|
|
||||||
P 152 : Pos = ( 0.33626, 0.46954)
|
|
||||||
P 153 : Pos = ( 0.22514, 0.83594)
|
|
||||||
P 154 : Pos = ( -0.39598, 0.54174)
|
|
||||||
P 155 : Pos = ( 0.38510, 0.68808)
|
|
||||||
P 156 : Pos = ( 1.88304, 1.12984)
|
|
||||||
P 157 : Pos = ( 1.51387, 0.50958)
|
|
||||||
P 158 : Pos = ( 0.53522, -0.54578)
|
|
||||||
P 159 : Pos = ( 0.90612, 0.92249)
|
|
||||||
P 160 : Pos = ( 0.53900, 0.82303)
|
|
||||||
P 161 : Pos = ( 1.81221, 1.44585)
|
|
||||||
P 162 : Pos = ( 2.23056, 2.06850)
|
|
||||||
P 163 : Pos = ( 1.78979, 1.38660)
|
|
||||||
P 164 : Pos = ( -0.96834, 1.70461)
|
|
||||||
P 165 : Pos = ( 0.66441, 1.22689)
|
|
||||||
P 166 : Pos = ( 0.54712, 1.41409)
|
|
||||||
P 167 : Pos = ( -0.82349, 0.07197)
|
|
||||||
P 168 : Pos = ( 1.75769, -0.37769)
|
|
||||||
P 169 : Pos = ( 1.47302, 1.06013)
|
|
||||||
P 170 : Pos = ( 1.48806, 1.32693)
|
|
||||||
P 171 : Pos = ( 0.53819, 1.70514)
|
|
||||||
P 172 : Pos = ( 0.48771, 0.93680)
|
|
||||||
P 173 : Pos = ( 1.20460, 0.70910)
|
|
||||||
P 174 : Pos = ( 1.42132, 1.90752)
|
|
||||||
P 175 : Pos = ( 1.66599, -0.44589)
|
|
||||||
P 176 : Pos = ( 1.98787, 0.73926)
|
|
||||||
P 177 : Pos = ( 0.61720, 1.43632)
|
|
||||||
P 178 : Pos = ( 0.09233, 0.40993)
|
|
||||||
P 179 : Pos = ( 0.89637, 1.23228)
|
|
||||||
P 180 : Pos = ( 0.94678, 1.29354)
|
|
||||||
P 181 : Pos = ( 1.56338, 0.93803)
|
|
||||||
P 182 : Pos = ( 1.00555, 2.41949)
|
|
||||||
P 183 : Pos = ( 0.91425, 1.25021)
|
|
||||||
P 184 : Pos = ( 0.45697, 0.79637)
|
|
||||||
P 185 : Pos = ( 0.76393, 0.89831)
|
|
||||||
P 186 : Pos = ( 1.11302, 0.03926)
|
|
||||||
P 187 : Pos = ( -0.91866, -0.03024)
|
|
||||||
P 188 : Pos = ( 3.53310, 0.66295)
|
|
||||||
P 189 : Pos = ( 1.50547, 0.76724)
|
|
||||||
P 190 : Pos = ( 0.26517, 2.07054)
|
|
||||||
P 191 : Pos = ( 0.94994, 1.75728)
|
|
||||||
P 192 : Pos = ( 0.68818, 0.75566)
|
|
||||||
P 193 : Pos = ( 0.20274, 3.62154)
|
|
||||||
P 194 : Pos = ( 0.43678, 0.82206)
|
|
||||||
P 195 : Pos = ( 0.50413, 0.17999)
|
|
||||||
P 196 : Pos = ( 1.74312, 0.19752)
|
|
||||||
P 197 : Pos = ( 1.12824, 1.45088)
|
|
||||||
P 198 : Pos = ( -0.08088, -0.36372)
|
|
||||||
P 199 : Pos = ( -0.77277, 1.86850)
|
|
||||||
P 200 : Pos = ( 1.11899, 0.64833)
|
|
||||||
P 201 : Pos = ( 1.28019, 1.05982)
|
|
||||||
P 202 : Pos = ( 2.11335, 1.43714)
|
|
||||||
P 203 : Pos = ( 1.06307, 0.76771)
|
|
||||||
P 204 : Pos = ( 0.44693, 0.65341)
|
|
||||||
P 205 : Pos = ( 1.62777, 0.67686)
|
|
||||||
P 206 : Pos = ( 1.25216, 1.87090)
|
|
||||||
P 207 : Pos = ( 1.82971, -0.23524)
|
|
||||||
P 208 : Pos = ( -0.19951, 3.85571)
|
|
||||||
P 209 : Pos = ( 0.16027, 0.70100)
|
|
||||||
P 210 : Pos = ( 2.46179, 0.16550)
|
|
||||||
P 211 : Pos = ( 1.43179, 0.51713)
|
|
||||||
P 212 : Pos = ( 0.47614, 0.08310)
|
|
||||||
P 213 : Pos = ( 1.44280, 0.96373)
|
|
||||||
P 214 : Pos = ( 0.97043, -1.71619)
|
|
||||||
P 215 : Pos = ( -2.08440, 3.61353)
|
|
||||||
P 216 : Pos = ( -1.78035, 2.03562)
|
|
||||||
P 217 : Pos = ( 1.45184, 0.58404)
|
|
||||||
P 218 : Pos = ( 0.05683, 1.58038)
|
|
||||||
P 219 : Pos = ( 0.91772, 1.09309)
|
|
||||||
P 220 : Pos = ( 1.10845, 1.36209)
|
|
||||||
P 221 : Pos = ( 1.07276, 1.28446)
|
|
||||||
P 222 : Pos = ( 1.03292, 0.57361)
|
|
||||||
P 223 : Pos = ( 0.60097, 0.83686)
|
|
||||||
P 224 : Pos = ( 1.42667, 1.25810)
|
|
||||||
P 225 : Pos = ( 0.61245, 1.00407)
|
|
||||||
P 226 : Pos = ( 3.85628, 3.54038)
|
|
||||||
P 227 : Pos = ( 1.02262, 1.42898)
|
|
||||||
P 228 : Pos = ( 1.49231, 0.39658)
|
|
||||||
P 229 : Pos = ( 1.15396, 0.44536)
|
|
||||||
P 230 : Pos = ( 2.21192, 1.04546)
|
|
||||||
P 231 : Pos = ( 1.48608, 1.25121)
|
|
||||||
P 232 : Pos = ( 1.18665, 0.43911)
|
|
||||||
P 233 : Pos = ( 0.50953, 0.66889)
|
|
||||||
P 234 : Pos = ( 0.98612, 0.67809)
|
|
||||||
P 235 : Pos = ( -1.12680, 1.97178)
|
|
||||||
P 236 : Pos = ( 0.53246, 1.89456)
|
|
||||||
P 237 : Pos = ( 1.02209, -0.03745)
|
|
||||||
P 238 : Pos = ( 2.92297, 0.51435)
|
|
||||||
P 239 : Pos = ( 0.64931, 0.27714)
|
|
||||||
P 240 : Pos = ( 2.49270, 1.32961)
|
|
||||||
P 241 : Pos = ( 0.36014, 0.95143)
|
|
||||||
P 242 : Pos = ( 1.64475, 0.87212)
|
|
||||||
P 243 : Pos = ( 0.27028, 2.63612)
|
|
||||||
P 244 : Pos = ( 0.15161, 1.65826)
|
|
||||||
P 245 : Pos = ( 0.42086, 1.04793)
|
|
||||||
P 246 : Pos = ( 1.44298, 1.14618)
|
|
||||||
P 247 : Pos = ( 0.26918, 2.12908)
|
|
||||||
P 248 : Pos = ( 1.18388, 0.80868)
|
|
||||||
P 249 : Pos = ( 1.11438, 2.04683)
|
|
||||||
P 250 : Pos = ( 0.82934, 1.14285)
|
|
||||||
P 251 : Pos = ( 1.41437, 1.43891)
|
|
||||||
P 252 : Pos = ( -0.95537, -1.89308)
|
|
||||||
P 253 : Pos = ( 1.65018, 0.70221)
|
|
||||||
P 254 : Pos = ( 1.53063, 0.84752)
|
|
||||||
P 255 : Pos = ( 1.82706, 1.41561)
|
|
|
@ -1,128 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#include "defs.h"
|
|
||||||
#include "memory.h"
|
|
||||||
#include "box.h"
|
|
||||||
#include "partition_grid.h"
|
|
||||||
#include "cost_zones.h"
|
|
||||||
|
|
||||||
#define NUM_DIRECTIONS 4
|
|
||||||
|
|
||||||
typedef enum { RIGHT, LEFT, UP, DOWN } direction;
|
|
||||||
|
|
||||||
static long Child_Sequence[NUM_DIRECTIONS][NUM_OFFSPRING] =
|
|
||||||
{
|
|
||||||
{ 0, 1, 2, 3 },
|
|
||||||
{ 2, 3, 0, 1 },
|
|
||||||
{ 0, 3, 2, 1 },
|
|
||||||
{ 2, 1, 0, 3 },
|
|
||||||
};
|
|
||||||
static long Direction_Sequence[NUM_DIRECTIONS][NUM_OFFSPRING] =
|
|
||||||
{
|
|
||||||
{ UP, RIGHT, RIGHT, DOWN },
|
|
||||||
{ DOWN, LEFT, LEFT, UP },
|
|
||||||
{ RIGHT, UP, UP, LEFT },
|
|
||||||
{ LEFT, DOWN, DOWN, RIGHT },
|
|
||||||
};
|
|
||||||
|
|
||||||
void ComputeSubTreeCosts(long my_id, box *b);
|
|
||||||
void CostZonesHelper(long my_id, box *b, long work, direction dir);
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
CostZones (long my_id)
|
|
||||||
{
|
|
||||||
PartitionIterate(my_id, ComputeSubTreeCosts, BOTTOM);
|
|
||||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
|
||||||
Local[my_id].Total_Work = Grid->subtree_cost;
|
|
||||||
Local[my_id].Min_Work = ((Local[my_id].Total_Work / Number_Of_Processors)
|
|
||||||
* my_id);
|
|
||||||
if (my_id == (Number_Of_Processors - 1))
|
|
||||||
Local[my_id].Max_Work = Local[my_id].Total_Work;
|
|
||||||
else
|
|
||||||
Local[my_id].Max_Work = (Local[my_id].Min_Work
|
|
||||||
+ (Local[my_id].Total_Work
|
|
||||||
/ Number_Of_Processors));
|
|
||||||
InitPartition(my_id);
|
|
||||||
CostZonesHelper(my_id, Grid, 0, RIGHT);
|
|
||||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ComputeSubTreeCosts (long my_id, box *b)
|
|
||||||
{
|
|
||||||
box *pb;
|
|
||||||
|
|
||||||
if (b->type == PARENT) {
|
|
||||||
while (b->interaction_synch != b->num_children) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b->interaction_synch = 0;
|
|
||||||
ComputeCostOfBox(b);
|
|
||||||
b->subtree_cost += b->cost;
|
|
||||||
pb = b->parent;
|
|
||||||
if (pb != NULL) {
|
|
||||||
ALOCK(G_Memory->lock_array, pb->exp_lock_index);
|
|
||||||
pb->subtree_cost += b->subtree_cost;
|
|
||||||
pb->interaction_synch += 1;
|
|
||||||
AULOCK(G_Memory->lock_array, pb->exp_lock_index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
CostZonesHelper (long my_id, box *b, long work, direction dir)
|
|
||||||
{
|
|
||||||
box *cb;
|
|
||||||
long i;
|
|
||||||
long *next_child;
|
|
||||||
long *child_dir;
|
|
||||||
|
|
||||||
if (b->type == CHILDLESS) {
|
|
||||||
if (work >= Local[my_id].Min_Work)
|
|
||||||
InsertBoxInPartition(my_id, b);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
next_child = Child_Sequence[dir];
|
|
||||||
child_dir = Direction_Sequence[dir];
|
|
||||||
for (i = 0; (i < NUM_OFFSPRING) && (work < Local[my_id].Max_Work);
|
|
||||||
i++) {
|
|
||||||
cb = b->children[next_child[i]];
|
|
||||||
if (cb != NULL) {
|
|
||||||
if ((work + cb->subtree_cost) >= Local[my_id].Min_Work)
|
|
||||||
CostZonesHelper(my_id, cb, work, child_dir[i]);
|
|
||||||
work += cb->subtree_cost;
|
|
||||||
}
|
|
||||||
if (i == 2) {
|
|
||||||
if ((work >= Local[my_id].Min_Work)
|
|
||||||
&& (work < Local[my_id].Max_Work))
|
|
||||||
InsertBoxInPartition(my_id, b);
|
|
||||||
work += b->cost;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#undef DOWN
|
|
||||||
#undef UP
|
|
||||||
#undef LEFT
|
|
||||||
#undef RIGHT
|
|
||||||
#undef NUM_DIRECTIONS
|
|
||||||
|
|
|
@ -1,22 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _Cost_Zones_H
|
|
||||||
#define _Cost_Zones_H 1
|
|
||||||
|
|
||||||
extern void CostZones(long my_id);
|
|
||||||
|
|
||||||
#endif /* _Cost_Zones_H */
|
|
|
@ -1,81 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdarg.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include "defs.h"
|
|
||||||
#include "memory.h"
|
|
||||||
|
|
||||||
long Number_Of_Processors;
|
|
||||||
double Timestep_Dur;
|
|
||||||
real Softening_Param;
|
|
||||||
long Expansion_Terms;
|
|
||||||
|
|
||||||
|
|
||||||
real
|
|
||||||
RoundReal (real val)
|
|
||||||
{
|
|
||||||
double shifter;
|
|
||||||
double frac;
|
|
||||||
long exp;
|
|
||||||
double shifted_frac;
|
|
||||||
double new_frac;
|
|
||||||
double temp;
|
|
||||||
real ret_val;
|
|
||||||
|
|
||||||
shifter = pow((double) 10, (double) REAL_DIG - 2);
|
|
||||||
frac = frexp((double) val, &exp);
|
|
||||||
shifted_frac = frac * shifter;
|
|
||||||
temp = modf(shifted_frac, &new_frac);
|
|
||||||
new_frac /= shifter;
|
|
||||||
ret_val = (real) ldexp(new_frac, exp);
|
|
||||||
return ret_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PrintComplexNum (complex *c)
|
|
||||||
{
|
|
||||||
if (c->i >= (real) 0.0)
|
|
||||||
printf("%e + %ei", c->r, c->i);
|
|
||||||
else
|
|
||||||
printf("%e - %ei", c->r, -c->i);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PrintVector (vector *v)
|
|
||||||
{
|
|
||||||
printf("(%10.5f, %10.5f)", v->x, v->y);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
LockedPrint (char *format_str, ...)
|
|
||||||
{
|
|
||||||
va_list ap;
|
|
||||||
|
|
||||||
va_start(ap, format_str);
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
fflush(stdout);
|
|
||||||
vfprintf(stdout, format_str, ap);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
va_end(ap);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -1,175 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _Defs_H
|
|
||||||
#define _Defs_H 1
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <limits.h>
|
|
||||||
|
|
||||||
/* Define booleans */
|
|
||||||
#ifdef TRUE
|
|
||||||
#undef TRUE
|
|
||||||
#endif
|
|
||||||
#ifdef FALSE
|
|
||||||
#undef FALSE
|
|
||||||
#endif
|
|
||||||
#define NUM_DIMENSIONS 2
|
|
||||||
#define NUM_DIM_POW_2 4
|
|
||||||
|
|
||||||
#undef DBL_MIN
|
|
||||||
#define DBL_MIN 2.2250738585072014e-308 /* min > 0 val of "double" */
|
|
||||||
|
|
||||||
#define TIME_ALL 1 /* non-0 means time each phase within a time step */
|
|
||||||
#define MY_TIMING (Local[my_id].Timing)
|
|
||||||
#define MY_TIME_STEP (Local[my_id].Time_Step)
|
|
||||||
|
|
||||||
#define MAX_REAL DBL_MAX
|
|
||||||
#define MIN_REAL DBL_MIN
|
|
||||||
#define REAL_DIG __DBL_DIG__
|
|
||||||
|
|
||||||
#define MAX_PROCS 64
|
|
||||||
|
|
||||||
/* Defines the maximum depth of the tree */
|
|
||||||
#define MAX_LEVEL 100
|
|
||||||
#define MAX_TIME_STEPS 10
|
|
||||||
|
|
||||||
#define COMPLEX_ADD(a,b,c) \
|
|
||||||
{ \
|
|
||||||
a.r = b.r + c.r; \
|
|
||||||
a.i = b.i + c.i; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define COMPLEX_SUB(a,b,c) \
|
|
||||||
{ \
|
|
||||||
a.r = b.r - c.r; \
|
|
||||||
a.i = b.i - c.i; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define COMPLEX_MUL(a,b,c) \
|
|
||||||
{ \
|
|
||||||
complex _c_temp; \
|
|
||||||
\
|
|
||||||
_c_temp.r = (b.r * c.r) - (b.i * c.i); \
|
|
||||||
_c_temp.i = (b.r * c.i) + (b.i * c.r); \
|
|
||||||
a.r = _c_temp.r; \
|
|
||||||
a.i = _c_temp.i; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define COMPLEX_DIV(a,b,c) \
|
|
||||||
{ \
|
|
||||||
real _denom; \
|
|
||||||
complex _c_temp; \
|
|
||||||
\
|
|
||||||
_denom = ((real) 1.0) / ((c.r * c.r) + (c.i * c.i)); \
|
|
||||||
_c_temp.r = ((b.r * c.r) + (b.i * c.i)) * _denom; \
|
|
||||||
_c_temp.i = ((b.i * c.r) - (b.r * c.i)) * _denom; \
|
|
||||||
a.r = _c_temp.r; \
|
|
||||||
a.i = _c_temp.i; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define COMPLEX_ABS(a) \
|
|
||||||
sqrt((double) ((a.r * a.r) + (a.i * a.i)))
|
|
||||||
|
|
||||||
#define VECTOR_ADD(a,b,c) \
|
|
||||||
{ \
|
|
||||||
a.x = b.x + c.x; \
|
|
||||||
a.y = b.y + c.y; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define VECTOR_SUB(a,b,c) \
|
|
||||||
{ \
|
|
||||||
a.x = b.x - c.x; \
|
|
||||||
a.y = b.y - c.y; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define VECTOR_MUL(a,b,c) \
|
|
||||||
{ \
|
|
||||||
a.x = b.x * c; \
|
|
||||||
a.y = b.y * c; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define VECTOR_DIV(a,b,c) \
|
|
||||||
{ \
|
|
||||||
a.x = b.x / c; \
|
|
||||||
a.y = b.y / c; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DOT_PRODUCT(a,b) \
|
|
||||||
((a.x * b.x) + (a.y * b.y))
|
|
||||||
|
|
||||||
#define ADD_COST 2
|
|
||||||
#define MUL_COST 5
|
|
||||||
#define DIV_COST 19
|
|
||||||
#define ABS_COST 1
|
|
||||||
|
|
||||||
#define U_LIST_COST(a,b) (1.06 * 79.2 * a * b)
|
|
||||||
#define V_LIST_COST(a) (1.08 * ((35.9 * a * a) + (133.6 * a)))
|
|
||||||
#define W_LIST_COST(a,b) (1.11 * 29.2 * a * b)
|
|
||||||
#define X_LIST_COST(a,b) (1.15 * 56.0 * a * b)
|
|
||||||
#define SELF_COST(a) (7.0 * 61.4 * a * a)
|
|
||||||
|
|
||||||
/* SWOO: Did I put this here? If so, you don't need it */
|
|
||||||
#define CACHE_SIZE 16 /* should be in bytes */
|
|
||||||
|
|
||||||
#define PAGE_SIZE 4096
|
|
||||||
#define PAD_SIZE (PAGE_SIZE / (sizeof(long)))
|
|
||||||
|
|
||||||
typedef enum { FALSE = 0, TRUE = 1 } bool;
|
|
||||||
|
|
||||||
/* These defintions sets the precision of the calculations. To use single
|
|
||||||
* precision, simply change double to float and recompile! */
|
|
||||||
typedef double real;
|
|
||||||
|
|
||||||
typedef struct __Complex complex;
|
|
||||||
struct __Complex {
|
|
||||||
real r;
|
|
||||||
real i;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct _Vector vector;
|
|
||||||
struct _Vector {
|
|
||||||
real x;
|
|
||||||
real y;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct _Time_Info time_info;
|
|
||||||
struct _Time_Info {
|
|
||||||
unsigned long construct_time;
|
|
||||||
unsigned long list_time;
|
|
||||||
unsigned long partition_time;
|
|
||||||
unsigned long inter_time;
|
|
||||||
unsigned long pass_time;
|
|
||||||
unsigned long intra_time;
|
|
||||||
unsigned long barrier_time;
|
|
||||||
unsigned long other_time;
|
|
||||||
unsigned long total_time;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern long Number_Of_Processors;
|
|
||||||
extern double Timestep_Dur;
|
|
||||||
extern real Softening_Param;
|
|
||||||
extern long Expansion_Terms;
|
|
||||||
|
|
||||||
extern real RoundReal(real val);
|
|
||||||
extern void PrintComplexNum(complex *c);
|
|
||||||
extern void PrintVector(vector *v);
|
|
||||||
extern void LockedPrint(char *format, ...);
|
|
||||||
|
|
||||||
#endif /* _Defs_H */
|
|
|
@ -1,615 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* FMM.C
|
|
||||||
*
|
|
||||||
* This file contains the entry to Greengard's adaptive algorithm.
|
|
||||||
*
|
|
||||||
|
|
||||||
Usage: FMM <options> < inputfile
|
|
||||||
|
|
||||||
Command line options:
|
|
||||||
|
|
||||||
-o : Print out final particle positions.
|
|
||||||
-s : Print out individual processor timing statistics.
|
|
||||||
-h : Print out command line options
|
|
||||||
|
|
||||||
Input file parameter description:
|
|
||||||
There are a total of nine parameters, with parameters
|
|
||||||
three through seven having no default values.
|
|
||||||
|
|
||||||
1) Cluster Type : Particles are distributed either in one cluster,
|
|
||||||
or two interacting clusters of size (# of particles)/ 2.
|
|
||||||
These two options are selected by the strings "one cluster" or
|
|
||||||
"two cluster". The default is for two clusters.
|
|
||||||
2) Distribution Type : Particles are distributed in a cluster
|
|
||||||
either in a spherical uniform distribution, or according to
|
|
||||||
the Plummer model which typically has a large percentage of the
|
|
||||||
particles close to the center of the sphere and fewer particles
|
|
||||||
farther from the center. There two options are selected by
|
|
||||||
the strings "uniform" or "plummer". The default is for a
|
|
||||||
plummer distribution.
|
|
||||||
3) Number Of Particles : Should be an integer greater than 0.
|
|
||||||
4) Precision : A measure of how accurate the calculation should be.
|
|
||||||
A precision of 1e-3 means that the results will be accurate to
|
|
||||||
within three decimal places regardless of the relative magnitude
|
|
||||||
of the positions. The precision should be a real number greater
|
|
||||||
than 0.
|
|
||||||
5) Number of Processors : Should be an integer greater than 0.
|
|
||||||
6) Number of Time Steps : Should be an integer greater than 0.
|
|
||||||
7) Duration of a Time Step : How long each time step lasts.
|
|
||||||
Should be a double greater than 0.
|
|
||||||
8) Softening Parameter : This value sets the minimum distance in
|
|
||||||
each direction that two particles can be separated by. If two
|
|
||||||
particles are closer than this, the distance used for the
|
|
||||||
calculation is changed to the softening parameter. The particle
|
|
||||||
positions themselves are NOT changed. This number should be a
|
|
||||||
real number greater than 0 and defaults to DBL_MIN or FLT_MIN,
|
|
||||||
depending on what type of data is being used.
|
|
||||||
9) Partitioning Scheme : Sets which type of partitioning scheme
|
|
||||||
is used. There are currently two : "cost zones" and "orb".
|
|
||||||
The default is cost zones.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include "defs.h"
|
|
||||||
#include "memory.h"
|
|
||||||
#include "particle.h"
|
|
||||||
#include "box.h"
|
|
||||||
#include "partition_grid.h"
|
|
||||||
#include "cost_zones.h"
|
|
||||||
#include "construct_grid.h"
|
|
||||||
#include "interactions.h"
|
|
||||||
|
|
||||||
#define BASE ((((double) 4) - sqrt((double) 2)) / sqrt((double) 2))
|
|
||||||
#define MAX_LINE_SIZE 100
|
|
||||||
/* OCCUPANCY * maximum particles per box = avg number of particles per box */
|
|
||||||
#define OCCUPANCY ((MAX_PARTICLES_PER_BOX > 5) ? .375 : .750)
|
|
||||||
/* Some processors will be given more than the average number of particles.
|
|
||||||
* PDF (Particle Distribution Factor) is the ratio of the maximum to the avg */
|
|
||||||
#define PDF 4.0
|
|
||||||
/* A nonuniform distribution will require more boxes than a uniform
|
|
||||||
* distribution of the same size. TOLERANCE is used to account for this */
|
|
||||||
#define TOLERANCE 1.5
|
|
||||||
/* Save as PDF, but for boxes */
|
|
||||||
/* define BDF (((Total_Particles/Number_Of_Processors) > 128) ? 2.0 : 3.0)*/
|
|
||||||
#define BDF (((Total_Particles/Number_Of_Processors) > 128) ? 4.0 : 8.0)
|
|
||||||
|
|
||||||
static partition_alg Partition_Flag;
|
|
||||||
static real Precision;
|
|
||||||
static long Time_Steps;
|
|
||||||
static cluster_type Cluster;
|
|
||||||
static model_type Model;
|
|
||||||
long do_stats = 0;
|
|
||||||
long do_output = 0;
|
|
||||||
unsigned long starttime;
|
|
||||||
unsigned long endtime;
|
|
||||||
|
|
||||||
void ParallelExecute(void);
|
|
||||||
void StepSimulation(long my_id, time_info *local_time, long time_all);
|
|
||||||
void PartitionGrid(long my_id, time_info *local_time, long time_all);
|
|
||||||
void GetArguments(void);
|
|
||||||
void PrintTimes(void);
|
|
||||||
void Help(void);
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
main (int argc, char *argv[])
|
|
||||||
{
|
|
||||||
long c;
|
|
||||||
extern char *optarg;
|
|
||||||
|
|
||||||
CLOCK(starttime);
|
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "osh")) != -1) {
|
|
||||||
switch(c) {
|
|
||||||
case 'o': do_output = 1; break;
|
|
||||||
case 's': do_stats = 1; break;
|
|
||||||
case 'h': Help(); break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MAIN_INITENV(,40000000);
|
|
||||||
|
|
||||||
GetArguments();
|
|
||||||
InitGlobalMemory();
|
|
||||||
InitExpTables();
|
|
||||||
CreateDistribution(Cluster, Model);
|
|
||||||
|
|
||||||
/* for (i = 1; i < Number_Of_Processors; i++) {
|
|
||||||
CREATE(ParallelExecute);
|
|
||||||
}
|
|
||||||
ParallelExecute();
|
|
||||||
WAIT_FOR_END(Number_Of_Processors - 1);*/
|
|
||||||
CREATE(ParallelExecute, Number_Of_Processors);
|
|
||||||
WAIT_FOR_END(Number_Of_Processors);
|
|
||||||
|
|
||||||
printf("Finished FMM\n");
|
|
||||||
PrintTimes();
|
|
||||||
if (do_output) {
|
|
||||||
PrintAllParticles();
|
|
||||||
}
|
|
||||||
MAIN_END;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ParallelExecute ()
|
|
||||||
{
|
|
||||||
long my_id;
|
|
||||||
long num_boxes;
|
|
||||||
unsigned long start, finish = 0;
|
|
||||||
time_info *local_time;
|
|
||||||
long time_all = 0;
|
|
||||||
time_info *timing;
|
|
||||||
unsigned long local_init_done = 0;
|
|
||||||
|
|
||||||
BARINCLUDE(G_Memory->synch);
|
|
||||||
local_time = (time_info *) malloc(sizeof(struct _Time_Info) * MAX_TIME_STEPS);
|
|
||||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
|
||||||
LOCK(G_Memory->count_lock);
|
|
||||||
my_id = G_Memory->id;
|
|
||||||
G_Memory->id++;
|
|
||||||
UNLOCK(G_Memory->count_lock);
|
|
||||||
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
|
||||||
processors to avoid migration */
|
|
||||||
|
|
||||||
if (my_id == 0) {
|
|
||||||
time_all = 1;
|
|
||||||
} else if (do_stats) {
|
|
||||||
time_all = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (my_id == 0) {
|
|
||||||
/* have to allocate extra space since it will construct the grid by
|
|
||||||
* itself for the first time step */
|
|
||||||
CreateParticleList(my_id, Total_Particles);
|
|
||||||
InitParticleList(my_id, Total_Particles, 0);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
CreateParticleList(my_id, ((Total_Particles * PDF)
|
|
||||||
/ Number_Of_Processors));
|
|
||||||
InitParticleList(my_id, 0, 0);
|
|
||||||
}
|
|
||||||
num_boxes = 1.333 * (Total_Particles / (OCCUPANCY * MAX_PARTICLES_PER_BOX));
|
|
||||||
if (my_id == 0)
|
|
||||||
CreateBoxes(my_id, TOLERANCE * num_boxes);
|
|
||||||
else
|
|
||||||
CreateBoxes(my_id, TOLERANCE * num_boxes * BDF / Number_Of_Processors);
|
|
||||||
|
|
||||||
if (my_id == 0) {
|
|
||||||
LockedPrint("Starting FMM with %d processor%s\n", Number_Of_Processors,
|
|
||||||
(Number_Of_Processors == 1) ? "" : "s");
|
|
||||||
}
|
|
||||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
|
||||||
Local[my_id].Time = 0.0;
|
|
||||||
for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) {
|
|
||||||
|
|
||||||
if (MY_TIME_STEP == 2) {
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
|
||||||
statistics that one is measuring about the parallel execution */
|
|
||||||
}
|
|
||||||
|
|
||||||
if (MY_TIME_STEP == 2) {
|
|
||||||
if (do_stats || my_id == 0) {
|
|
||||||
CLOCK(local_init_done);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (MY_TIME_STEP == 0) {
|
|
||||||
CLOCK(start);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
start = finish;
|
|
||||||
ConstructGrid(my_id,local_time,time_all);
|
|
||||||
ConstructLists(my_id,local_time,time_all);
|
|
||||||
PartitionGrid(my_id,local_time,time_all);
|
|
||||||
StepSimulation(my_id,local_time,time_all);
|
|
||||||
DestroyGrid(my_id,local_time,time_all);
|
|
||||||
CLOCK(finish);
|
|
||||||
Local[my_id].Time += Timestep_Dur;
|
|
||||||
MY_TIMING[MY_TIME_STEP].total_time = finish - start;
|
|
||||||
}
|
|
||||||
if (my_id == 0) {
|
|
||||||
CLOCK(endtime);
|
|
||||||
}
|
|
||||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
|
||||||
for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) {
|
|
||||||
timing = &(MY_TIMING[MY_TIME_STEP]);
|
|
||||||
timing->other_time = local_time[MY_TIME_STEP].other_time;
|
|
||||||
timing->construct_time = local_time[MY_TIME_STEP].construct_time;
|
|
||||||
timing->list_time = local_time[MY_TIME_STEP].list_time;
|
|
||||||
timing->partition_time = local_time[MY_TIME_STEP].partition_time;
|
|
||||||
timing->pass_time = local_time[MY_TIME_STEP].pass_time;
|
|
||||||
timing->inter_time = local_time[MY_TIME_STEP].inter_time;
|
|
||||||
timing->barrier_time = local_time[MY_TIME_STEP].barrier_time;
|
|
||||||
timing->intra_time = local_time[MY_TIME_STEP].intra_time;
|
|
||||||
}
|
|
||||||
Local[my_id].init_done_times = local_init_done;
|
|
||||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PartitionGrid (long my_id, time_info *local_time, long time_all)
|
|
||||||
{
|
|
||||||
unsigned long start = 0, finish;
|
|
||||||
|
|
||||||
if (time_all)
|
|
||||||
CLOCK(start);
|
|
||||||
if (Partition_Flag == COST_ZONES)
|
|
||||||
CostZones(my_id);
|
|
||||||
if (time_all) {
|
|
||||||
CLOCK(finish);
|
|
||||||
local_time[MY_TIME_STEP].partition_time = finish - start;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
StepSimulation (long my_id, time_info *local_time, long time_all)
|
|
||||||
{
|
|
||||||
unsigned long start, finish;
|
|
||||||
unsigned long upward_end, interaction_end, downward_end, barrier_end;
|
|
||||||
|
|
||||||
if (time_all)
|
|
||||||
CLOCK(start);
|
|
||||||
PartitionIterate(my_id, UpwardPass, BOTTOM);
|
|
||||||
if (time_all)
|
|
||||||
CLOCK(upward_end);
|
|
||||||
PartitionIterate(my_id, ComputeInteractions, BOTTOM);
|
|
||||||
if (time_all)
|
|
||||||
CLOCK(interaction_end);
|
|
||||||
BARRIER(G_Memory->synch, Number_Of_Processors);
|
|
||||||
if (time_all)
|
|
||||||
CLOCK(barrier_end);
|
|
||||||
PartitionIterate(my_id, DownwardPass, TOP);
|
|
||||||
if (time_all)
|
|
||||||
CLOCK(downward_end);
|
|
||||||
PartitionIterate(my_id, ComputeParticlePositions, CHILDREN);
|
|
||||||
if (time_all)
|
|
||||||
CLOCK(finish);
|
|
||||||
|
|
||||||
if (time_all) {
|
|
||||||
local_time[MY_TIME_STEP].pass_time = upward_end - start;
|
|
||||||
local_time[MY_TIME_STEP].inter_time = interaction_end - upward_end;
|
|
||||||
local_time[MY_TIME_STEP].barrier_time = barrier_end - interaction_end;
|
|
||||||
local_time[MY_TIME_STEP].pass_time += downward_end - barrier_end;
|
|
||||||
local_time[MY_TIME_STEP].intra_time = finish - downward_end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
GetArguments ()
|
|
||||||
{
|
|
||||||
char *input;
|
|
||||||
|
|
||||||
input = (char *) malloc(MAX_LINE_SIZE * sizeof(char));
|
|
||||||
if (input == NULL) {
|
|
||||||
fprintf(stderr, "ERROR\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
gets(input);
|
|
||||||
if (strcmp(input, "one cluster") == 0)
|
|
||||||
Cluster = ONE_CLUSTER;
|
|
||||||
else {
|
|
||||||
if ((*input == '\0') || (strcmp(input, "two cluster") == 0))
|
|
||||||
Cluster = TWO_CLUSTER;
|
|
||||||
else {
|
|
||||||
fprintf(stderr, "ERROR: The only cluster types available are ");
|
|
||||||
fprintf(stderr, "\"one cluster\" or \"two cluster\".\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
gets(input);
|
|
||||||
if (strcmp(input, "uniform") == 0)
|
|
||||||
Model = UNIFORM;
|
|
||||||
else {
|
|
||||||
if ((*input == '\0') || (strcmp(input, "plummer") == 0))
|
|
||||||
Model = PLUMMER;
|
|
||||||
else {
|
|
||||||
fprintf(stderr, "ERROR: The only distributions available are ");
|
|
||||||
fprintf(stderr, "\"uniform\" or \"plummer\".\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Total_Particles = atoi(gets(input));
|
|
||||||
if (Total_Particles <= 0) {
|
|
||||||
fprintf(stderr, "ERROR: The number of particles should be an int ");
|
|
||||||
fprintf(stderr, "greater than 0.\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
Precision = atof(gets(input));
|
|
||||||
if (Precision == 0.0) {
|
|
||||||
fprintf(stderr, "ERROR: The precision has no default value.\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
/* Determine number of multipole expansion terms needed for specified
|
|
||||||
* precision and flag an error if it is too precise */
|
|
||||||
Expansion_Terms = (long) ceil(-(log(Precision) / log(BASE)));
|
|
||||||
if (Expansion_Terms > MAX_EXPANSION_TERMS) {
|
|
||||||
fprintf(stderr, "ERROR: %g (%ld terms) is too great a precision.\n", Precision, Expansion_Terms);
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
Number_Of_Processors = atoi(gets(input));
|
|
||||||
if (Number_Of_Processors == 0) {
|
|
||||||
fprintf(stderr, "ERROR: The Number_Of_Processors has no default.\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
if (Number_Of_Processors < 0) {
|
|
||||||
fprintf(stderr, "ERROR: Number of processors should be an int greater ");
|
|
||||||
fprintf(stderr, "than 0.\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
Time_Steps = atoi(gets(input));
|
|
||||||
if (Time_Steps == 0) {
|
|
||||||
fprintf(stderr, "ERROR: The number of time steps has no default.\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
if (Time_Steps < 0) {
|
|
||||||
fprintf(stderr, "ERROR: The number of time steps should be an int ");
|
|
||||||
fprintf(stderr, "greater than 0.\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
Timestep_Dur = atof(gets(input));
|
|
||||||
if (Timestep_Dur == 0.0) {
|
|
||||||
fprintf(stderr, "ERROR: The duration of a time step has no default ");
|
|
||||||
fprintf(stderr, "value.\n If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
if (Timestep_Dur < 0) {
|
|
||||||
fprintf(stderr, "ERROR: The duration of a time step should be a ");
|
|
||||||
fprintf(stderr, "double greater than 0.\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
Softening_Param = atof(gets(input));
|
|
||||||
if (Softening_Param == 0.0)
|
|
||||||
Softening_Param = MIN_REAL;
|
|
||||||
if (Softening_Param < 0) {
|
|
||||||
fprintf(stderr, "ERROR: The softening parameter should be a double ");
|
|
||||||
fprintf(stderr, "greater than 0.\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
gets(input);
|
|
||||||
if ((*input == '\0') || (strcmp(input, "cost zones") == 0))
|
|
||||||
Partition_Flag = COST_ZONES;
|
|
||||||
else {
|
|
||||||
if (strcmp(input, "orb") == 0)
|
|
||||||
Partition_Flag = ORB;
|
|
||||||
else {
|
|
||||||
fprintf(stderr, "ERROR: The only partitioning schemes available ");
|
|
||||||
fprintf(stderr, "are \"cost zones\" \n\t or \"orb\".\n");
|
|
||||||
fprintf(stderr, "If you need help, type \"nbody -help\".\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PrintTimes ()
|
|
||||||
{
|
|
||||||
long i, j;
|
|
||||||
time_info *timing;
|
|
||||||
FILE *fp;
|
|
||||||
double t_total_time = 0;
|
|
||||||
double t_tree_time = 0;
|
|
||||||
double t_list_time = 0;
|
|
||||||
double t_part_time = 0;
|
|
||||||
double t_pass_time = 0;
|
|
||||||
double t_inter_time = 0;
|
|
||||||
double t_bar_time = 0;
|
|
||||||
double t_intra_time = 0;
|
|
||||||
double t_other_time = 0;
|
|
||||||
double total_time;
|
|
||||||
double tree_time;
|
|
||||||
double list_time;
|
|
||||||
double part_time;
|
|
||||||
double pass_time;
|
|
||||||
double inter_time;
|
|
||||||
double bar_time;
|
|
||||||
double intra_time;
|
|
||||||
double other_time;
|
|
||||||
double overall_total = 0;
|
|
||||||
long P;
|
|
||||||
long init_done;
|
|
||||||
|
|
||||||
if ((fp = fopen("times", "w")) == NULL) {
|
|
||||||
fprintf(stderr, "Error opening output file\n");
|
|
||||||
fflush(stderr);
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
fprintf(fp, "TIMING:\n");
|
|
||||||
fprintf(fp, "%ld\t%ld\t%.2e\t%ld\n", Number_Of_Processors, Total_Particles, Precision, Time_Steps);
|
|
||||||
for (i = 0; i < Time_Steps; i++) {
|
|
||||||
fprintf(fp, "Time Step %ld\n", i);
|
|
||||||
for (j = 0; j < Number_Of_Processors; j++) {
|
|
||||||
timing = &(Local[j].Timing[i]);
|
|
||||||
fprintf(fp, "Processor %ld\n", j);
|
|
||||||
fprintf(fp, "\tTotal Time = %lu\n", timing->total_time);
|
|
||||||
if (do_stats) {
|
|
||||||
fprintf(fp, "\tTree Construction Time = %lu\n",
|
|
||||||
timing->construct_time);
|
|
||||||
fprintf(fp, "\tList Construction Time = %lu\n", timing->list_time);
|
|
||||||
fprintf(fp, "\tPartition Time = %lu\n", timing->partition_time);
|
|
||||||
fprintf(fp, "\tTree Pass Time = %lu\n", timing->pass_time);
|
|
||||||
fprintf(fp, "\tInter Particle Time = %lu\n", timing->inter_time);
|
|
||||||
fprintf(fp, "\tBarrier Time = %lu\n", timing->barrier_time);
|
|
||||||
fprintf(fp, "\tIntra Particle Time = %lu\n", timing->intra_time);
|
|
||||||
fprintf(fp, "\tOther Time = %lu\n", timing->other_time);
|
|
||||||
}
|
|
||||||
fflush(fp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fprintf(fp, "END\n");
|
|
||||||
fclose(fp);
|
|
||||||
|
|
||||||
printf(" PROCESS STATISTICS\n");
|
|
||||||
printf(" Track Tree List Part Pass Inter Bar Intra Other\n");
|
|
||||||
printf(" Proc Time Time Time Time Time Time Time Time Time\n");
|
|
||||||
total_time = tree_time = list_time = part_time = pass_time =
|
|
||||||
inter_time = bar_time = intra_time = other_time = 0;
|
|
||||||
for (i = 2; i < Time_Steps; i++) {
|
|
||||||
timing = &(Local[0].Timing[i]);
|
|
||||||
total_time += timing->total_time;
|
|
||||||
tree_time += timing->construct_time;
|
|
||||||
list_time += timing->list_time;
|
|
||||||
part_time += timing->partition_time;
|
|
||||||
pass_time += timing->pass_time;
|
|
||||||
inter_time += timing->inter_time;
|
|
||||||
bar_time += timing->barrier_time;
|
|
||||||
intra_time += timing->intra_time;
|
|
||||||
other_time += timing->other_time;
|
|
||||||
}
|
|
||||||
printf(" %4d %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n",
|
|
||||||
0,total_time,tree_time,list_time,part_time,pass_time,
|
|
||||||
inter_time,bar_time,intra_time,other_time);
|
|
||||||
t_total_time += total_time;
|
|
||||||
t_tree_time += tree_time;
|
|
||||||
t_list_time += list_time;
|
|
||||||
t_part_time += part_time;
|
|
||||||
t_pass_time += pass_time;
|
|
||||||
t_inter_time += inter_time;
|
|
||||||
t_bar_time += bar_time;
|
|
||||||
t_intra_time += intra_time;
|
|
||||||
t_other_time += other_time;
|
|
||||||
if (total_time > overall_total) {
|
|
||||||
overall_total = total_time;
|
|
||||||
}
|
|
||||||
for (j = 1; j < Number_Of_Processors; j++) {
|
|
||||||
total_time = tree_time = list_time = part_time = pass_time =
|
|
||||||
inter_time = bar_time = intra_time = other_time = 0;
|
|
||||||
for (i = 2; i < Time_Steps; i++) {
|
|
||||||
timing = &(Local[j].Timing[i]);
|
|
||||||
total_time += timing->total_time;
|
|
||||||
tree_time += timing->construct_time;
|
|
||||||
list_time += timing->list_time;
|
|
||||||
part_time += timing->partition_time;
|
|
||||||
pass_time += timing->pass_time;
|
|
||||||
inter_time += timing->inter_time;
|
|
||||||
bar_time += timing->barrier_time;
|
|
||||||
intra_time += timing->intra_time;
|
|
||||||
other_time += timing->other_time;
|
|
||||||
}
|
|
||||||
if (do_stats) {
|
|
||||||
printf(" %4ld %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n",
|
|
||||||
j,total_time,tree_time,list_time,part_time,pass_time,
|
|
||||||
inter_time,bar_time,intra_time,other_time);
|
|
||||||
}
|
|
||||||
t_total_time += total_time;
|
|
||||||
t_tree_time += tree_time;
|
|
||||||
t_list_time += list_time;
|
|
||||||
t_part_time += part_time;
|
|
||||||
t_pass_time += pass_time;
|
|
||||||
t_inter_time += inter_time;
|
|
||||||
t_bar_time += bar_time;
|
|
||||||
t_intra_time += intra_time;
|
|
||||||
t_other_time += other_time;
|
|
||||||
if (total_time > overall_total) {
|
|
||||||
overall_total = total_time;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (do_stats) {
|
|
||||||
P = Number_Of_Processors;
|
|
||||||
printf(" Avg %12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f%12.0f\n",
|
|
||||||
t_total_time/P,t_tree_time/P,t_list_time/P,t_part_time/P,
|
|
||||||
t_pass_time/P,t_inter_time/P,t_bar_time/P,t_intra_time/P,
|
|
||||||
t_other_time/P);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
if (Time_Steps > 2) {
|
|
||||||
init_done = Local[0].init_done_times;
|
|
||||||
if (do_stats) {
|
|
||||||
for (j = 1; j < Number_Of_Processors; j++) {
|
|
||||||
if (Local[j].init_done_times > init_done) {
|
|
||||||
init_done = Local[j].init_done_times;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printf(" TIMING INFORMATION\n");
|
|
||||||
printf("Start time : %16lu\n", starttime);
|
|
||||||
printf("Initialization finish time : %16lu\n", init_done);
|
|
||||||
printf("Overall finish time : %16lu\n", endtime);
|
|
||||||
printf("Total time with initialization : %16lu\n", endtime - starttime);
|
|
||||||
printf("Total time without initialization : %16lu\n", (long) (overall_total));
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
printf("Total time for steps %ld to %ld : %12.0f\n", 3L, Time_Steps, overall_total);
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
Help ()
|
|
||||||
{
|
|
||||||
printf("Usage: FMM <options> < inputfile\n\n");
|
|
||||||
printf("options:\n");
|
|
||||||
printf(" -o : Print out final particle positions.\n");
|
|
||||||
printf(" -s : Print out individual processor timing statistics.\n");
|
|
||||||
printf(" -h : Print out command line options\n");
|
|
||||||
printf("\n");
|
|
||||||
printf("Input parameter descriptions:\n");
|
|
||||||
printf(" There are nine parameters, and parameters three through\n");
|
|
||||||
printf(" have no default values.\n");
|
|
||||||
printf("1) Cluster Type : Distribute particles in one cluster\n");
|
|
||||||
|
|
||||||
printf(" (\"one cluster\") or two interacting clusters (\"two cluster\")\n");
|
|
||||||
printf(" Default is two cluster.\n");
|
|
||||||
printf("2) Distribution Type : Distribute particles in either a\n");
|
|
||||||
printf(" uniform spherical distribution (\"uniform\"), or in a\n");
|
|
||||||
printf(" Plummer model (\"plummer\"). Default is plummer.\n");
|
|
||||||
printf("3) Number Of Particles : Integer greater than 0.\n");
|
|
||||||
printf("4) Precision : Precision of results. Should be a double.\n");
|
|
||||||
printf("5) Number of Processors : Integer greater than 0.\n");
|
|
||||||
printf("6) Number of Time Steps : Integer greater than 0.\n");
|
|
||||||
printf("7) Time Step Duration : Double greater than 0.\n");
|
|
||||||
printf("8) Softening Parameter : Real number greater than 0.\n");
|
|
||||||
printf(" Defaults is DBL_MIN or FLT_MIN.\n");
|
|
||||||
printf("9) Partitioning Scheme : \"cost zones\" or \"orb\".\n");
|
|
||||||
printf(" Default is cost zones.\n");
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef MAX_LINE_SIZE
|
|
||||||
#undef BASE
|
|
|
@ -1,9 +0,0 @@
|
||||||
two cluster
|
|
||||||
plummer
|
|
||||||
16384
|
|
||||||
1e-6
|
|
||||||
1
|
|
||||||
5
|
|
||||||
.025
|
|
||||||
0.0
|
|
||||||
cost zones
|
|
|
@ -1,9 +0,0 @@
|
||||||
two cluster
|
|
||||||
plummer
|
|
||||||
2048
|
|
||||||
1e-6
|
|
||||||
1
|
|
||||||
5
|
|
||||||
.025
|
|
||||||
0.0
|
|
||||||
cost zones
|
|
|
@ -1,9 +0,0 @@
|
||||||
two cluster
|
|
||||||
plummer
|
|
||||||
256
|
|
||||||
1e-6
|
|
||||||
1
|
|
||||||
5
|
|
||||||
.025
|
|
||||||
0.0
|
|
||||||
cost zones
|
|
|
@ -1,664 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include "defs.h"
|
|
||||||
#include "memory.h"
|
|
||||||
#include "particle.h"
|
|
||||||
#include "box.h"
|
|
||||||
#include "partition_grid.h"
|
|
||||||
#include "interactions.h"
|
|
||||||
|
|
||||||
static real Inv[MAX_EXPANSION_TERMS + 1];
|
|
||||||
static real OverInc[MAX_EXPANSION_TERMS + 1];
|
|
||||||
static real C[2 * MAX_EXPANSION_TERMS][2 * MAX_EXPANSION_TERMS];
|
|
||||||
static complex One;
|
|
||||||
static complex Zero;
|
|
||||||
|
|
||||||
void InitExp(box *b);
|
|
||||||
void ComputeMPExp(box *b);
|
|
||||||
void ShiftMPExp(box *cb, box *pb);
|
|
||||||
void UListInteraction(long my_id, box *b1, box *b2);
|
|
||||||
void VListInteraction(long my_id, box *source_box, box *dest_box);
|
|
||||||
void WAndXListInteractions(long my_id, box *b1, box *b2);
|
|
||||||
void WListInteraction(box *source_box, box *dest_box);
|
|
||||||
void XListInteraction(box *source_box, box *dest_box);
|
|
||||||
void ComputeSelfInteraction(box *b);
|
|
||||||
void ShiftLocalExp(box *pb, box *cb);
|
|
||||||
void EvaluateLocalExp(box *b);
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
InitExpTables ()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
for (i = 1; i < MAX_EXPANSION_TERMS + 1; i++) {
|
|
||||||
Inv[i] = ((real) 1) / (real) i;
|
|
||||||
OverInc[i] = ((real) i) / ((real) i + (real) 1);
|
|
||||||
}
|
|
||||||
C[0][0] = (real) 1.0;
|
|
||||||
for (i = 1; i < (2 * MAX_EXPANSION_TERMS); i++) {
|
|
||||||
C[i][0] = (real) 1.0;
|
|
||||||
C[i][1] = (real) i;
|
|
||||||
C[i - 1][i] = (real) 0.0;
|
|
||||||
for (j = 2; j <= i; j++)
|
|
||||||
C[i][j] = C[i - 1][j] + C[i - 1][j - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
One.r = (real) 1.0;
|
|
||||||
One.i = (real) 0.0;
|
|
||||||
Zero.r = (real) 0.0;
|
|
||||||
Zero.i = (real) 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PrintExpTables ()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
printf("Table for the functions f(i) = 1 / i and g(i) = i / (i + 1)\n");
|
|
||||||
printf("i\t\tf(i)\t\tg(i)\t\t\n");
|
|
||||||
for (i = 1; i < MAX_EXPANSION_TERMS; i++)
|
|
||||||
printf("%ld\t\t%e\t%f\t\n", i, Inv[i], OverInc[i]);
|
|
||||||
printf("\n\nTable for the function h(i,j) = i choose j\n");
|
|
||||||
printf("i\tj\th(i,j)\n");
|
|
||||||
for (i = 0; i < (2 * MAX_EXPANSION_TERMS); i++) {
|
|
||||||
for (j = 0; j <= i; j++)
|
|
||||||
printf("%ld\t%ld\t%g\n", i, j, C[i][j]);
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
UpwardPass (long my_id, box *b)
|
|
||||||
{
|
|
||||||
InitExp(b);
|
|
||||||
if (b->type == CHILDLESS) {
|
|
||||||
ComputeMPExp(b);
|
|
||||||
ALOCK(G_Memory->lock_array, b->exp_lock_index);
|
|
||||||
b->interaction_synch = 1;
|
|
||||||
AULOCK(G_Memory->lock_array, b->exp_lock_index);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
while (b->interaction_synch != b->num_children) {
|
|
||||||
/* wait */;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (b->parent != NULL) {
|
|
||||||
ShiftMPExp(b, b->parent);
|
|
||||||
ALOCK(G_Memory->lock_array, b->parent->exp_lock_index);
|
|
||||||
b->parent->interaction_synch += 1;
|
|
||||||
AULOCK(G_Memory->lock_array, b->parent->exp_lock_index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ComputeInteractions (long my_id, box *b)
|
|
||||||
{
|
|
||||||
b->cost = 0;
|
|
||||||
if (b->type == CHILDLESS) {
|
|
||||||
ComputeSelfInteraction(b);
|
|
||||||
ListIterate(my_id, b, b->u_list, b->num_u_list, UListInteraction);
|
|
||||||
ListIterate(my_id, b, b->w_list, b->num_w_list, WAndXListInteractions);
|
|
||||||
}
|
|
||||||
ListIterate(my_id, b, b->v_list, b->num_v_list, VListInteraction);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
DownwardPass (long my_id, box *b)
|
|
||||||
{
|
|
||||||
if (b->parent != NULL) {
|
|
||||||
while (b->parent->interaction_synch != 0) {
|
|
||||||
/* wait */;
|
|
||||||
}
|
|
||||||
ShiftLocalExp(b->parent, b);
|
|
||||||
}
|
|
||||||
if (b->type == CHILDLESS) {
|
|
||||||
EvaluateLocalExp(b);
|
|
||||||
b->interaction_synch = 0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
ALOCK(G_Memory->lock_array, b->exp_lock_index);
|
|
||||||
b->interaction_synch = 0;
|
|
||||||
AULOCK(G_Memory->lock_array, b->exp_lock_index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ComputeParticlePositions (long my_id, box *b)
|
|
||||||
{
|
|
||||||
particle *p;
|
|
||||||
vector force;
|
|
||||||
vector new_acc;
|
|
||||||
vector delta_acc;
|
|
||||||
vector delta_vel;
|
|
||||||
vector avg_vel;
|
|
||||||
vector delta_pos;
|
|
||||||
long i;
|
|
||||||
|
|
||||||
for (i = 0; i < b->num_particles; i++) {
|
|
||||||
p = b->particles[i];
|
|
||||||
force.x = p->field.r * p->charge;
|
|
||||||
force.y = p->field.i * p->charge;
|
|
||||||
VECTOR_DIV(new_acc, force, p->mass);
|
|
||||||
if (Local[my_id].Time_Step != 0) {
|
|
||||||
VECTOR_SUB(delta_acc, new_acc, (p->acc));
|
|
||||||
VECTOR_MUL(delta_vel, delta_acc, ((real) Timestep_Dur) / (real) 2.0);
|
|
||||||
VECTOR_ADD((p->vel), (p->vel), delta_vel);
|
|
||||||
}
|
|
||||||
p->acc.x = new_acc.x;
|
|
||||||
p->acc.y = new_acc.y;
|
|
||||||
VECTOR_MUL(delta_vel, (p->acc), ((real) Timestep_Dur) / (real) 2.0);
|
|
||||||
VECTOR_ADD(avg_vel, (p->vel), delta_vel);
|
|
||||||
VECTOR_MUL(delta_pos, avg_vel, (real) Timestep_Dur);
|
|
||||||
VECTOR_ADD((p->vel), avg_vel, delta_vel);
|
|
||||||
VECTOR_ADD((p->pos), (p->pos), delta_pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
InitExp (box *b)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
b->mp_expansion[i].r = 0.0;
|
|
||||||
b->mp_expansion[i].i = 0.0;
|
|
||||||
b->local_expansion[i].r = 0.0;
|
|
||||||
b->local_expansion[i].i = 0.0;
|
|
||||||
b->x_expansion[i].r = 0.0;
|
|
||||||
b->x_expansion[i].i = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ComputeMPExp (long my_id, box *b)
|
|
||||||
*
|
|
||||||
* Args : a box, b.
|
|
||||||
*
|
|
||||||
* Returns : nothing.
|
|
||||||
*
|
|
||||||
* Side Effects : Computes and sets the multipole expansion array.
|
|
||||||
*
|
|
||||||
* Comments : The first terms (a0) in the expansion is simply the sum of the
|
|
||||||
* charges in the box. This procedure first computes the distances between
|
|
||||||
* the particles in the box and the boxes center. At the same time, a0 is
|
|
||||||
* computed. Then the remaining terms are calculated by theorem 2.1.1 in
|
|
||||||
* Greengard's thesis.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
ComputeMPExp (box *b)
|
|
||||||
{
|
|
||||||
particle *p;
|
|
||||||
complex charge;
|
|
||||||
complex box_pos;
|
|
||||||
complex particle_pos;
|
|
||||||
complex z0;
|
|
||||||
complex z0_pow_n;
|
|
||||||
complex temp;
|
|
||||||
complex result_exp[MAX_EXPANSION_TERMS];
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
box_pos.r = b->x_center;
|
|
||||||
box_pos.i = b->y_center;
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
result_exp[i].r = (real) 0.0;
|
|
||||||
result_exp[i].i = (real) 0.0;
|
|
||||||
}
|
|
||||||
for (i = 0; i < b->num_particles; i++) {
|
|
||||||
p = b->particles[i];
|
|
||||||
particle_pos.r = p->pos.x;
|
|
||||||
particle_pos.i = p->pos.y;
|
|
||||||
charge.r = p->charge;
|
|
||||||
charge.i = (real) 0.0;
|
|
||||||
COMPLEX_SUB(z0, particle_pos, box_pos);
|
|
||||||
z0_pow_n.r = One.r;
|
|
||||||
z0_pow_n.i = One.i;
|
|
||||||
for (j = 1; j < Expansion_Terms; j++) {
|
|
||||||
COMPLEX_MUL(temp, z0_pow_n, charge);
|
|
||||||
COMPLEX_ADD(result_exp[j], result_exp[j], temp);
|
|
||||||
COMPLEX_MUL(z0_pow_n, z0_pow_n, z0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ALOCK(G_Memory->lock_array, b->exp_lock_index);
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
b->mp_expansion[i].r = result_exp[i].r;
|
|
||||||
b->mp_expansion[i].i = result_exp[i].i;
|
|
||||||
}
|
|
||||||
AULOCK(G_Memory->lock_array, b->exp_lock_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ShiftMPExp (box *cb, box *pb)
|
|
||||||
{
|
|
||||||
complex z0;
|
|
||||||
complex z0_inv;
|
|
||||||
complex z0_pow_n;
|
|
||||||
complex z0_pow_minus_n;
|
|
||||||
complex temp_exp[MAX_EXPANSION_TERMS];
|
|
||||||
complex result_exp[MAX_EXPANSION_TERMS];
|
|
||||||
complex child_pos;
|
|
||||||
complex parent_pos;
|
|
||||||
complex temp;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
child_pos.r = cb->x_center;
|
|
||||||
child_pos.i = cb->y_center;
|
|
||||||
parent_pos.r = pb->x_center;
|
|
||||||
parent_pos.i = pb->y_center;
|
|
||||||
COMPLEX_SUB(z0, child_pos, parent_pos);
|
|
||||||
COMPLEX_DIV(z0_inv, One, z0);
|
|
||||||
z0_pow_n.r = One.r;
|
|
||||||
z0_pow_n.i = One.i;
|
|
||||||
z0_pow_minus_n.r = One.r;
|
|
||||||
z0_pow_minus_n.i = One.i;
|
|
||||||
result_exp[0].r = cb->mp_expansion[0].r;
|
|
||||||
result_exp[0].i = cb->mp_expansion[0].i;
|
|
||||||
for (i = 1; i < Expansion_Terms; i++) {
|
|
||||||
result_exp[i].r = (real) 0.0;
|
|
||||||
result_exp[i].i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(z0_pow_minus_n, z0_pow_minus_n, z0_inv);
|
|
||||||
COMPLEX_MUL(temp_exp[i], z0_pow_minus_n, cb->mp_expansion[i]);
|
|
||||||
for (j = 1; j <= i; j++) {
|
|
||||||
temp.r = C[i - 1][j - 1];
|
|
||||||
temp.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp, temp, temp_exp[j]);
|
|
||||||
COMPLEX_ADD(result_exp[i], result_exp[i], temp);
|
|
||||||
}
|
|
||||||
temp.r = Inv[i];
|
|
||||||
temp.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp, temp, cb->mp_expansion[0]);
|
|
||||||
COMPLEX_SUB(temp, result_exp[i], temp);
|
|
||||||
COMPLEX_MUL(z0_pow_n, z0_pow_n, z0);
|
|
||||||
COMPLEX_MUL(result_exp[i], temp, z0_pow_n);
|
|
||||||
}
|
|
||||||
ALOCK(G_Memory->lock_array, pb->exp_lock_index);
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
COMPLEX_ADD((pb->mp_expansion[i]), (pb->mp_expansion[i]), result_exp[i]);
|
|
||||||
}
|
|
||||||
AULOCK(G_Memory->lock_array, pb->exp_lock_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
UListInteraction (long my_id, box *source_box, box *dest_box)
|
|
||||||
{
|
|
||||||
complex result;
|
|
||||||
complex temp_vector;
|
|
||||||
complex temp_charge;
|
|
||||||
complex temp_result;
|
|
||||||
real denom;
|
|
||||||
real x_sep;
|
|
||||||
real y_sep;
|
|
||||||
real dest_x;
|
|
||||||
real dest_y;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
for (i = 0; i < dest_box->num_particles; i++) {
|
|
||||||
result.r = (real) 0.0;
|
|
||||||
result.i = (real) 0.0;
|
|
||||||
dest_x = dest_box->particles[i]->pos.x;
|
|
||||||
dest_y = dest_box->particles[i]->pos.y;
|
|
||||||
for (j = 0; j < source_box->num_particles; j++) {
|
|
||||||
x_sep = source_box->particles[j]->pos.x - dest_x;
|
|
||||||
y_sep = source_box->particles[j]->pos.y - dest_y;
|
|
||||||
denom = ((real) 1.0) / ((x_sep * x_sep) + (y_sep * y_sep));
|
|
||||||
temp_vector.r = x_sep * denom;
|
|
||||||
temp_vector.i = y_sep * denom;
|
|
||||||
temp_charge.r = source_box->particles[j]->charge;
|
|
||||||
temp_charge.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp_result, temp_vector, temp_charge);
|
|
||||||
COMPLEX_SUB(result, result, temp_result);
|
|
||||||
}
|
|
||||||
result.i = -result.i;
|
|
||||||
COMPLEX_ADD((dest_box->particles[i]->field),
|
|
||||||
(dest_box->particles[i]->field), result);
|
|
||||||
}
|
|
||||||
|
|
||||||
dest_box->cost += U_LIST_COST(source_box->num_particles,
|
|
||||||
dest_box->num_particles);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
VListInteraction (long my_id, box *source_box, box *dest_box)
|
|
||||||
{
|
|
||||||
complex z0;
|
|
||||||
complex z0_inv;
|
|
||||||
complex z0_pow_minus_n[MAX_EXPANSION_TERMS];
|
|
||||||
complex temp_exp[MAX_EXPANSION_TERMS];
|
|
||||||
complex result_exp;
|
|
||||||
complex source_pos;
|
|
||||||
complex dest_pos;
|
|
||||||
complex temp;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
if (source_box->type == CHILDLESS) {
|
|
||||||
while (source_box->interaction_synch != 1) {
|
|
||||||
/* wait */;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
while (source_box->interaction_synch != source_box->num_children) {
|
|
||||||
/* wait */;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
source_pos.r = source_box->x_center;
|
|
||||||
source_pos.i = source_box->y_center;
|
|
||||||
dest_pos.r = dest_box->x_center;
|
|
||||||
dest_pos.i = dest_box->y_center;
|
|
||||||
COMPLEX_SUB(z0, source_pos, dest_pos);
|
|
||||||
COMPLEX_DIV(z0_inv, One, z0);
|
|
||||||
z0_pow_minus_n[0].r = One.r;
|
|
||||||
z0_pow_minus_n[0].i = One.i;
|
|
||||||
temp_exp[0].r = source_box->mp_expansion[0].r;
|
|
||||||
temp_exp[0].i = source_box->mp_expansion[0].i;
|
|
||||||
for (i = 1; i < Expansion_Terms; i++) {
|
|
||||||
COMPLEX_MUL(z0_pow_minus_n[i], z0_pow_minus_n[i - 1], z0_inv);
|
|
||||||
COMPLEX_MUL(temp_exp[i], z0_pow_minus_n[i], source_box->mp_expansion[i]);
|
|
||||||
}
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
result_exp.r = (real) 0.0;
|
|
||||||
result_exp.i = (real) 0.0;
|
|
||||||
for (j = 1; j < Expansion_Terms; j++) {
|
|
||||||
temp.r = C[i + j - 1][j - 1];
|
|
||||||
temp.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp, temp, temp_exp[j]);
|
|
||||||
if ((j & 0x1) == 0x0) {
|
|
||||||
COMPLEX_ADD(result_exp, result_exp, temp);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
COMPLEX_SUB(result_exp, result_exp, temp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
COMPLEX_MUL(result_exp, result_exp, z0_pow_minus_n[i]);
|
|
||||||
if (i == 0) {
|
|
||||||
temp.r = log(COMPLEX_ABS(z0));
|
|
||||||
temp.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp, temp, source_box->mp_expansion[0]);
|
|
||||||
COMPLEX_ADD(result_exp, result_exp, temp);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
temp.r = Inv[i];
|
|
||||||
temp.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp, temp, z0_pow_minus_n[i]);
|
|
||||||
COMPLEX_MUL(temp, temp, source_box->mp_expansion[0]);
|
|
||||||
COMPLEX_SUB(result_exp, result_exp, temp);
|
|
||||||
}
|
|
||||||
COMPLEX_ADD((dest_box->local_expansion[i]),
|
|
||||||
(dest_box->local_expansion[i]), result_exp);
|
|
||||||
}
|
|
||||||
dest_box->cost += V_LIST_COST(Expansion_Terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
WAndXListInteractions (long my_id, box *b1, box *b2)
|
|
||||||
{
|
|
||||||
WListInteraction(b1, b2);
|
|
||||||
XListInteraction(b2, b1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
WListInteraction (box *source_box, box *dest_box)
|
|
||||||
{
|
|
||||||
complex z0;
|
|
||||||
complex z0_inv;
|
|
||||||
complex result;
|
|
||||||
complex source_pos;
|
|
||||||
complex particle_pos;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
if (source_box->type == CHILDLESS) {
|
|
||||||
while (source_box->interaction_synch != 1) {
|
|
||||||
/* wait */;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
while (source_box->interaction_synch != source_box->num_children) {
|
|
||||||
/* wait */;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
source_pos.r = source_box->x_center;
|
|
||||||
source_pos.i = source_box->y_center;
|
|
||||||
for (i = 0; i < dest_box->num_particles; i++) {
|
|
||||||
result.r = (real) 0.0;
|
|
||||||
result.i = (real) 0.0;
|
|
||||||
particle_pos.r = dest_box->particles[i]->pos.x;
|
|
||||||
particle_pos.i = dest_box->particles[i]->pos.y;
|
|
||||||
COMPLEX_SUB(z0, particle_pos, source_pos);
|
|
||||||
COMPLEX_DIV(z0_inv, One, z0);
|
|
||||||
for (j = Expansion_Terms - 1; j > 0; j--) {
|
|
||||||
COMPLEX_ADD(result, result, (source_box->mp_expansion[j]));
|
|
||||||
COMPLEX_MUL(result, result, z0_inv);
|
|
||||||
}
|
|
||||||
COMPLEX_ADD((dest_box->particles[i]->field),
|
|
||||||
(dest_box->particles[i]->field), result);
|
|
||||||
}
|
|
||||||
|
|
||||||
dest_box->cost += W_LIST_COST(dest_box->num_particles, Expansion_Terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
XListInteraction (box *source_box, box *dest_box)
|
|
||||||
{
|
|
||||||
complex z0;
|
|
||||||
complex z0_inv;
|
|
||||||
complex z0_pow_minus_n;
|
|
||||||
complex result_exp[MAX_EXPANSION_TERMS];
|
|
||||||
complex source_pos;
|
|
||||||
complex dest_pos;
|
|
||||||
complex charge;
|
|
||||||
complex temp;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
dest_pos.r = dest_box->x_center;
|
|
||||||
dest_pos.i = dest_box->y_center;
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
result_exp[i].r = (real) 0.0;
|
|
||||||
result_exp[i].i = (real) 0.0;
|
|
||||||
}
|
|
||||||
for (i = 0; i < source_box->num_particles; i++) {
|
|
||||||
source_pos.r = source_box->particles[i]->pos.x;
|
|
||||||
source_pos.i = source_box->particles[i]->pos.y;
|
|
||||||
charge.r = source_box->particles[i]->charge;
|
|
||||||
charge.i = (real) 0.0;
|
|
||||||
COMPLEX_SUB(z0, source_pos, dest_pos);
|
|
||||||
COMPLEX_DIV(z0_inv, One, z0);
|
|
||||||
z0_pow_minus_n.r = z0_inv.r;
|
|
||||||
z0_pow_minus_n.i = z0_inv.i;
|
|
||||||
for (j = 1; j < Expansion_Terms; j++) {
|
|
||||||
COMPLEX_MUL(z0_pow_minus_n, z0_pow_minus_n, z0_inv);
|
|
||||||
COMPLEX_MUL(temp, charge, z0_pow_minus_n);
|
|
||||||
COMPLEX_ADD(result_exp[j], result_exp[j], temp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ALOCK(G_Memory->lock_array, dest_box->exp_lock_index);
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
COMPLEX_SUB((dest_box->x_expansion[i]),
|
|
||||||
(dest_box->x_expansion[i]), result_exp[i]);
|
|
||||||
}
|
|
||||||
AULOCK(G_Memory->lock_array, dest_box->exp_lock_index);
|
|
||||||
source_box->cost += X_LIST_COST(source_box->num_particles, Expansion_Terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ComputeSelfInteraction (box *b)
|
|
||||||
{
|
|
||||||
complex results[MAX_PARTICLES_PER_BOX];
|
|
||||||
complex temp_vector;
|
|
||||||
complex temp_charge;
|
|
||||||
complex temp_result;
|
|
||||||
real denom;
|
|
||||||
real x_sep;
|
|
||||||
real y_sep;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
for (i = 0; i < b->num_particles; i++) {
|
|
||||||
results[i].r = (real) 0.0;
|
|
||||||
results[i].i = (real) 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < b->num_particles; i++) {
|
|
||||||
for (j = i + 1; j < b->num_particles; j++) {
|
|
||||||
x_sep = b->particles[i]->pos.x - b->particles[j]->pos.x;
|
|
||||||
y_sep = b->particles[i]->pos.y - b->particles[j]->pos.y;
|
|
||||||
|
|
||||||
if ((fabs(x_sep) < Softening_Param)
|
|
||||||
&& (fabs(y_sep) < Softening_Param)) {
|
|
||||||
if (x_sep >= 0.0)
|
|
||||||
x_sep = Softening_Param;
|
|
||||||
else
|
|
||||||
x_sep = -Softening_Param;
|
|
||||||
if (y_sep >= 0.0)
|
|
||||||
y_sep = Softening_Param;
|
|
||||||
else
|
|
||||||
y_sep = -Softening_Param;
|
|
||||||
}
|
|
||||||
denom = ((real) 1.0) / ((x_sep * x_sep) + (y_sep * y_sep));
|
|
||||||
temp_vector.r = x_sep * denom;
|
|
||||||
temp_vector.i = y_sep * denom;
|
|
||||||
|
|
||||||
temp_charge.r = b->particles[j]->charge;
|
|
||||||
temp_charge.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp_result, temp_vector, temp_charge);
|
|
||||||
COMPLEX_ADD(results[i], results[i], temp_result);
|
|
||||||
|
|
||||||
temp_charge.r = b->particles[i]->charge;
|
|
||||||
temp_charge.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp_result, temp_vector, temp_charge);
|
|
||||||
COMPLEX_SUB(results[j], results[j], temp_result);
|
|
||||||
}
|
|
||||||
results[i].i = -results[i].i;
|
|
||||||
COMPLEX_ADD((b->particles[i]->field),
|
|
||||||
(b->particles[i]->field), results[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
b->cost += SELF_COST(b->num_particles);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ShiftLocalExp (box *pb, box *cb)
|
|
||||||
{
|
|
||||||
complex z0;
|
|
||||||
complex z0_inv;
|
|
||||||
complex z0_pow_n;
|
|
||||||
complex z0_pow_minus_n;
|
|
||||||
complex temp_exp[MAX_EXPANSION_TERMS];
|
|
||||||
complex result_exp[MAX_EXPANSION_TERMS];
|
|
||||||
complex child_pos;
|
|
||||||
complex parent_pos;
|
|
||||||
complex temp;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
child_pos.r = cb->x_center;
|
|
||||||
child_pos.i = cb->y_center;
|
|
||||||
parent_pos.r = pb->x_center;
|
|
||||||
parent_pos.i = pb->y_center;
|
|
||||||
COMPLEX_SUB(z0, child_pos, parent_pos);
|
|
||||||
COMPLEX_DIV(z0_inv, One, z0);
|
|
||||||
z0_pow_n.r = One.r;
|
|
||||||
z0_pow_n.i = One.i;
|
|
||||||
z0_pow_minus_n.r = One.r;
|
|
||||||
z0_pow_minus_n.i = One.i;
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
COMPLEX_ADD(pb->local_expansion[i], pb->local_expansion[i],
|
|
||||||
pb->x_expansion[i]);
|
|
||||||
COMPLEX_MUL(temp_exp[i], z0_pow_n, pb->local_expansion[i]);
|
|
||||||
COMPLEX_MUL(z0_pow_n, z0_pow_n, z0);
|
|
||||||
}
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
result_exp[i].r = (real) 0.0;
|
|
||||||
result_exp[i].i = (real) 0.0;
|
|
||||||
for (j = i; j < Expansion_Terms ; j++) {
|
|
||||||
temp.r = C[j][i];
|
|
||||||
temp.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(temp, temp, temp_exp[j]);
|
|
||||||
COMPLEX_ADD(result_exp[i], result_exp[i], temp);
|
|
||||||
}
|
|
||||||
COMPLEX_MUL(result_exp[i], temp, z0_pow_minus_n);
|
|
||||||
COMPLEX_MUL(z0_pow_minus_n, z0_pow_minus_n, z0_inv);
|
|
||||||
}
|
|
||||||
ALOCK(G_Memory->lock_array, cb->exp_lock_index);
|
|
||||||
for (i = 0; i < Expansion_Terms; i++) {
|
|
||||||
COMPLEX_ADD((cb->local_expansion[i]), (cb->local_expansion[i]),
|
|
||||||
result_exp[i]);
|
|
||||||
}
|
|
||||||
AULOCK(G_Memory->lock_array, cb->exp_lock_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
EvaluateLocalExp (box *b)
|
|
||||||
{
|
|
||||||
complex z0;
|
|
||||||
complex result;
|
|
||||||
complex source_pos;
|
|
||||||
complex particle_pos;
|
|
||||||
complex temp;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
source_pos.r = b->x_center;
|
|
||||||
source_pos.i = b->y_center;
|
|
||||||
for (i = 0; i < b->num_particles; i++) {
|
|
||||||
result.r = (real) 0.0;
|
|
||||||
result.i = (real) 0.0;
|
|
||||||
particle_pos.r = b->particles[i]->pos.x;
|
|
||||||
particle_pos.i = b->particles[i]->pos.y;
|
|
||||||
COMPLEX_SUB(z0, particle_pos, source_pos);
|
|
||||||
for (j = Expansion_Terms - 1; j > 0; j--) {
|
|
||||||
temp.r = (real) j;
|
|
||||||
temp.i = (real) 0.0;
|
|
||||||
COMPLEX_MUL(result, result, z0);
|
|
||||||
COMPLEX_MUL(temp, temp, (b->local_expansion[j]));
|
|
||||||
COMPLEX_ADD(result, result, temp);
|
|
||||||
}
|
|
||||||
COMPLEX_ADD((b->particles[i]->field), (b->particles[i]->field), result);
|
|
||||||
b->particles[i]->field.r = -(b->particles[i]->field.r);
|
|
||||||
b->particles[i]->field.r = RoundReal(b->particles[i]->field.r);
|
|
||||||
b->particles[i]->field.i = RoundReal(b->particles[i]->field.i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _Expansions_H
|
|
||||||
#define _Expansions_H 1
|
|
||||||
|
|
||||||
#include "box.h"
|
|
||||||
|
|
||||||
extern void InitExpTables(void);
|
|
||||||
extern void PrintExpTables(void);
|
|
||||||
extern void UpwardPass(long my_id, box *b);
|
|
||||||
extern void ComputeInteractions(long my_id, box *b);
|
|
||||||
extern void DownwardPass(long my_id, box *b);
|
|
||||||
extern void ComputeParticlePositions(long my_id, box *b);
|
|
||||||
|
|
||||||
#endif /* _Interactions_H */
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -1,60 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#include <float.h>
|
|
||||||
#include "defs.h"
|
|
||||||
#include "memory.h"
|
|
||||||
|
|
||||||
MAIN_ENV
|
|
||||||
|
|
||||||
g_mem *G_Memory;
|
|
||||||
local_memory Local[MAX_PROCS];
|
|
||||||
|
|
||||||
/*
|
|
||||||
* InitGlobalMemory ()
|
|
||||||
*
|
|
||||||
* Args : none.
|
|
||||||
*
|
|
||||||
* Returns : nothing.
|
|
||||||
*
|
|
||||||
* Side Effects : Allocates all the global storage for G_Memory.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
InitGlobalMemory ()
|
|
||||||
{
|
|
||||||
G_Memory = (g_mem *) G_MALLOC(sizeof(g_mem));
|
|
||||||
G_Memory->i_array = (long *) G_MALLOC(Number_Of_Processors * sizeof(long));
|
|
||||||
G_Memory->d_array = (double *) G_MALLOC(Number_Of_Processors * sizeof(double));
|
|
||||||
if (G_Memory == NULL) {
|
|
||||||
printf("Ran out of global memory in InitGlobalMemory\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
G_Memory->count = 0;
|
|
||||||
G_Memory->id = 0;
|
|
||||||
LOCKINIT(G_Memory->io_lock);
|
|
||||||
LOCKINIT(G_Memory->mal_lock);
|
|
||||||
LOCKINIT(G_Memory->single_lock);
|
|
||||||
LOCKINIT(G_Memory->count_lock);
|
|
||||||
ALOCKINIT(G_Memory->lock_array, MAX_LOCKS);
|
|
||||||
BARINIT(G_Memory->synch, Number_Of_Processors);
|
|
||||||
G_Memory->max_x = -MAX_REAL;
|
|
||||||
G_Memory->min_x = MAX_REAL;
|
|
||||||
G_Memory->max_y = -MAX_REAL;
|
|
||||||
G_Memory->min_y = MAX_REAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
|
@ -1,87 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _Memory_H
|
|
||||||
#define _Memory_H 1
|
|
||||||
|
|
||||||
#include "defs.h"
|
|
||||||
#include "particle.h"
|
|
||||||
#include "box.h"
|
|
||||||
|
|
||||||
#define MAX_LOCKS 2048
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
typedef struct _G_Mem g_mem;
|
|
||||||
|
|
||||||
struct _G_Mem
|
|
||||||
{
|
|
||||||
LOCKDEC(io_lock)
|
|
||||||
LOCKDEC(mal_lock)
|
|
||||||
LOCKDEC(single_lock)
|
|
||||||
LOCKDEC(count_lock)
|
|
||||||
long count;
|
|
||||||
ALOCKDEC(lock_array, MAX_LOCKS)
|
|
||||||
BARDEC(synch)
|
|
||||||
volatile long *i_array;
|
|
||||||
volatile double *d_array;
|
|
||||||
real f_array[MAX_PROCS][NUM_DIM_POW_2];
|
|
||||||
real max_x;
|
|
||||||
real min_x;
|
|
||||||
real max_y;
|
|
||||||
real min_y;
|
|
||||||
long id;
|
|
||||||
};
|
|
||||||
extern g_mem *G_Memory;
|
|
||||||
|
|
||||||
typedef struct _Local_Memory local_memory;
|
|
||||||
struct _Local_Memory {
|
|
||||||
long pad_begin[PAD_SIZE];
|
|
||||||
|
|
||||||
box *B_Heap;
|
|
||||||
long Index_B_Heap;
|
|
||||||
long Max_B_Heap;
|
|
||||||
|
|
||||||
particle **Particles;
|
|
||||||
long Num_Particles;
|
|
||||||
long Max_Particles;
|
|
||||||
|
|
||||||
box *Childless_Partition;
|
|
||||||
box *Parent_Partition[MAX_LEVEL];
|
|
||||||
long Max_Parent_Level;
|
|
||||||
|
|
||||||
box *Local_Grid;
|
|
||||||
real Local_X_Max;
|
|
||||||
real Local_X_Min;
|
|
||||||
real Local_Y_Max;
|
|
||||||
real Local_Y_Min;
|
|
||||||
|
|
||||||
long Total_Work;
|
|
||||||
long Min_Work;
|
|
||||||
long Max_Work;
|
|
||||||
|
|
||||||
long Time_Step;
|
|
||||||
double Time;
|
|
||||||
unsigned long init_done_times;
|
|
||||||
time_info Timing[MAX_TIME_STEPS];
|
|
||||||
|
|
||||||
long pad_end[PAD_SIZE];
|
|
||||||
};
|
|
||||||
extern local_memory Local[MAX_PROCS];
|
|
||||||
|
|
||||||
extern void InitGlobalMemory(void);
|
|
||||||
|
|
||||||
#endif /* _Memory_H */
|
|
|
@ -1,341 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "defs.h"
|
|
||||||
#include "memory.h"
|
|
||||||
#include "particle.h"
|
|
||||||
|
|
||||||
#define ONE_EV ((real) 1.6e-19)
|
|
||||||
#define MAX_FRAC 0.999
|
|
||||||
#define RANDOM_SIZE 256
|
|
||||||
|
|
||||||
#if !defined(M_PI)
|
|
||||||
#define M_PI 3.14159265358979323846
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* How many particles can fit on one line */
|
|
||||||
#define PARTICLES_PER_LINE 8
|
|
||||||
|
|
||||||
long Total_Particles;
|
|
||||||
|
|
||||||
/* Used to keep track of all the particles. Array in is order of inc id. */
|
|
||||||
static particle **Particle_List;
|
|
||||||
|
|
||||||
particle *InitParticle(real charge, real mass);
|
|
||||||
void PickShell(vector *v, real radius);
|
|
||||||
real XRand(real low, real high);
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
CreateDistribution (cluster_type cluster, model_type model)
|
|
||||||
{
|
|
||||||
particle *particle_array;
|
|
||||||
long global_num_particles;
|
|
||||||
particle *new_particle;
|
|
||||||
// char particle_state[RANDOM_SIZE];
|
|
||||||
real charge;
|
|
||||||
real r_scale;
|
|
||||||
real v_scale;
|
|
||||||
vector r_sum;
|
|
||||||
vector v_sum;
|
|
||||||
long end_limit = 0;
|
|
||||||
long i;
|
|
||||||
real temp_r;
|
|
||||||
real radius = 0.0;
|
|
||||||
real x_vel;
|
|
||||||
real y_vel;
|
|
||||||
real vel;
|
|
||||||
real offset = 0.0;
|
|
||||||
particle *twin_particle;
|
|
||||||
|
|
||||||
particle_array = (particle *) G_MALLOC(Total_Particles * sizeof(particle));
|
|
||||||
|
|
||||||
Particle_List = (particle **) G_MALLOC(Total_Particles * sizeof(particle *));
|
|
||||||
for (i = 0; i < Total_Particles; i++)
|
|
||||||
Particle_List[i] = &particle_array[i];
|
|
||||||
|
|
||||||
r_scale = 3 * M_PI / 16;
|
|
||||||
v_scale = (real) sqrt(1.0 / (double) r_scale);
|
|
||||||
r_sum.x = (real) 0.0;
|
|
||||||
r_sum.y = (real) 0.0;
|
|
||||||
v_sum.x = (real) 0.0;
|
|
||||||
v_sum.y = (real) 0.0;
|
|
||||||
// initstate(0, particle_state, RANDOM_SIZE);
|
|
||||||
|
|
||||||
switch (cluster) {
|
|
||||||
case ONE_CLUSTER:
|
|
||||||
end_limit = Total_Particles;
|
|
||||||
switch (model) {
|
|
||||||
case UNIFORM:
|
|
||||||
printf("Creating a one cluster, uniform distribution for %ld ", Total_Particles);
|
|
||||||
printf("particles\n");
|
|
||||||
break;
|
|
||||||
case PLUMMER:
|
|
||||||
printf("Creating a one cluster, non uniform distribution for %ld ", Total_Particles);
|
|
||||||
printf("particles\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case TWO_CLUSTER:
|
|
||||||
end_limit = (Total_Particles / 2) + (Total_Particles & 0x1);
|
|
||||||
switch (model) {
|
|
||||||
case UNIFORM:
|
|
||||||
printf("Creating a two cluster, uniform distribution for %ld ", Total_Particles);
|
|
||||||
printf("particles\n");
|
|
||||||
break;
|
|
||||||
case PLUMMER:
|
|
||||||
printf("Creating a two cluster, non uniform distribution for %ld ", Total_Particles);
|
|
||||||
printf("particles\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// setstate(particle_state);
|
|
||||||
global_num_particles = 0;
|
|
||||||
charge = 1.0 / Total_Particles;
|
|
||||||
charge /= Total_Particles;
|
|
||||||
for (i = 0; i < end_limit; i++) {
|
|
||||||
new_particle = InitParticle(charge, charge);
|
|
||||||
switch (model) {
|
|
||||||
case UNIFORM:
|
|
||||||
do {
|
|
||||||
new_particle->pos.x = XRand(-1.0, 1.0);
|
|
||||||
new_particle->pos.y = XRand(-1.0, 1.0);
|
|
||||||
temp_r = DOT_PRODUCT((new_particle->pos), (new_particle->pos));
|
|
||||||
}
|
|
||||||
while (temp_r > (real) 1.0);
|
|
||||||
radius = sqrt(temp_r);
|
|
||||||
break;
|
|
||||||
case PLUMMER:
|
|
||||||
do
|
|
||||||
radius = (real) 1.0 / (real) sqrt(pow(XRand(0.0, MAX_FRAC),
|
|
||||||
-2.0/3.0) - 1);
|
|
||||||
while (radius > 9.0);
|
|
||||||
PickShell(&(new_particle->pos), r_scale * radius);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
VECTOR_ADD(r_sum, r_sum, (new_particle->pos));
|
|
||||||
|
|
||||||
do {
|
|
||||||
x_vel = XRand(0.0, 1.0);
|
|
||||||
y_vel = XRand(0.0, 0.1);
|
|
||||||
}
|
|
||||||
while (y_vel > x_vel * x_vel * (real) pow(1.0 - (x_vel * x_vel), 3.5));
|
|
||||||
vel = (real) sqrt(2.0) * x_vel / pow(1.0 + (radius * radius), 0.25);
|
|
||||||
PickShell(&(new_particle->vel), v_scale * vel);
|
|
||||||
VECTOR_ADD(v_sum, v_sum, (new_particle->vel));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cluster == TWO_CLUSTER) {
|
|
||||||
switch (model) {
|
|
||||||
case UNIFORM:
|
|
||||||
offset = 1.5;
|
|
||||||
break;
|
|
||||||
case PLUMMER:
|
|
||||||
offset = 2.0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
for (i = end_limit; i < Total_Particles; i++) {
|
|
||||||
new_particle = InitParticle(charge, charge);
|
|
||||||
twin_particle = Particle_List[i - end_limit];
|
|
||||||
new_particle->pos.x = twin_particle->pos.x + offset;
|
|
||||||
new_particle->pos.y = twin_particle->pos.y + offset;
|
|
||||||
VECTOR_ADD(r_sum, r_sum, (new_particle->pos));
|
|
||||||
new_particle->vel.x = twin_particle->vel.x;
|
|
||||||
new_particle->vel.y = twin_particle->vel.y;
|
|
||||||
VECTOR_ADD(v_sum, v_sum, (new_particle->vel));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
VECTOR_DIV(r_sum, r_sum, (real) Total_Particles);
|
|
||||||
VECTOR_DIV(v_sum, v_sum, (real) Total_Particles);
|
|
||||||
for (i = 0; i < Total_Particles; i++) {
|
|
||||||
new_particle = Particle_List[i];
|
|
||||||
VECTOR_SUB((new_particle->pos), (new_particle->pos), r_sum);
|
|
||||||
VECTOR_SUB((new_particle->vel), (new_particle->vel), v_sum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
CreateParticleList (long my_id, long length)
|
|
||||||
{
|
|
||||||
LOCK(G_Memory->mal_lock);
|
|
||||||
Local[my_id].Particles = (particle **) G_MALLOC(length
|
|
||||||
* sizeof(particle *));
|
|
||||||
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute the
|
|
||||||
Particles data across physically distributed memories as desired.
|
|
||||||
|
|
||||||
One way to do this is as follows:
|
|
||||||
|
|
||||||
char *starting_address;
|
|
||||||
char *ending_address;
|
|
||||||
|
|
||||||
starting_address = (char *) Local[my_id].Particles;
|
|
||||||
ending_address = (((char *) Local[my_id].Particles)
|
|
||||||
+ (length * sizeof(particle *)) - 1);
|
|
||||||
|
|
||||||
Place all addresses x such that (starting_address <= x < ending_address)
|
|
||||||
on node my_id
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
UNLOCK(G_Memory->mal_lock);
|
|
||||||
Local[my_id].Max_Particles = length;
|
|
||||||
Local[my_id].Num_Particles = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
InitParticleList (long my_id, long num_assigned, long starting_id)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
for (i = 0; i < num_assigned; i++)
|
|
||||||
Local[my_id].Particles[i] = Particle_List[i + starting_id];
|
|
||||||
Local[my_id].Num_Particles = num_assigned;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* PrintParticle (particle *p)
|
|
||||||
*
|
|
||||||
* Args : the address of a particle, p.
|
|
||||||
*
|
|
||||||
* Returns : nothing.
|
|
||||||
*
|
|
||||||
* Side Effects : Prints to stdout the information stored for p.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
PrintParticle (particle *p)
|
|
||||||
{
|
|
||||||
if (p != NULL) {
|
|
||||||
printf("P %6ld :", p->id);
|
|
||||||
printf(" Pos = ");
|
|
||||||
PrintVector(&(p->pos));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
printf("Particle has not been initialized yet.\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PrintAllParticles ()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
fflush(stdout);
|
|
||||||
printf(" PARTICLE POSITIONS\n\n");
|
|
||||||
for (i = 0; i < Total_Particles; i++) {
|
|
||||||
PrintParticle(Particle_List[i]);
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PrintParticleArrayIds (particle **p_array, long num_particles)
|
|
||||||
{
|
|
||||||
long tab_count = PARTICLES_PER_LINE;
|
|
||||||
long i = 0;
|
|
||||||
|
|
||||||
if (num_particles == 0)
|
|
||||||
printf("NONE\n");
|
|
||||||
else {
|
|
||||||
for (i = 0; i < num_particles; i++) {
|
|
||||||
if (tab_count == 0) {
|
|
||||||
tab_count = PARTICLES_PER_LINE;
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
printf("\tP%ld", p_array[i]->id);
|
|
||||||
tab_count -= 1;
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* InitParticle (long my_id, real x_pos, real y_pos, real charge)
|
|
||||||
*
|
|
||||||
* Args : the x_pos, y_pos, and charge (in eV) of the particle.
|
|
||||||
*
|
|
||||||
* Returns : the address of the newly created particle.
|
|
||||||
*
|
|
||||||
* Side Effects : Initializes field to 0, and sets the particle ID to a
|
|
||||||
* unique number. Also converts charge to coulombs from eV.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
particle *
|
|
||||||
InitParticle (real charge, real mass)
|
|
||||||
{
|
|
||||||
particle *p;
|
|
||||||
static long particle_id = 0;
|
|
||||||
|
|
||||||
p = Particle_List[particle_id];
|
|
||||||
p->id = particle_id++;
|
|
||||||
p->charge = charge;
|
|
||||||
p->mass = mass;
|
|
||||||
p->pos.x = (real) 0.0;
|
|
||||||
p->pos.y = (real) 0.0;
|
|
||||||
p->vel.x = (real) 0.0;
|
|
||||||
p->vel.y = (real) 0.0;
|
|
||||||
p->acc.x = (real) 0.0;
|
|
||||||
p->acc.y = (real) 0.0;
|
|
||||||
p->field.r = (real) 0.0;
|
|
||||||
p->field.i = (real) 0.0;
|
|
||||||
p->cost = 1;
|
|
||||||
p->box = 0.0;
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PickShell (vector *v, real radius)
|
|
||||||
{
|
|
||||||
real temp_r;
|
|
||||||
real r_scale;
|
|
||||||
|
|
||||||
do {
|
|
||||||
v->x = XRand(-1.0, 1.0);
|
|
||||||
v->y = XRand(-1.0, 1.0);
|
|
||||||
temp_r = DOT_PRODUCT((*v), (*v));
|
|
||||||
}
|
|
||||||
while (temp_r >1.0);
|
|
||||||
r_scale = radius / (real) sqrt(temp_r);
|
|
||||||
VECTOR_MUL((*v), (*v), r_scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
real
|
|
||||||
XRand (real low, real high)
|
|
||||||
{
|
|
||||||
real ret_val;
|
|
||||||
|
|
||||||
ret_val = low + (high - low) * ((real) rand/*om*/() / 2147483647.0);
|
|
||||||
return ret_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#undef PARTICLES_PER_LINE
|
|
||||||
#undef MAX_FRAC
|
|
||||||
#undef RANDOM_SIZE
|
|
||||||
#undef ONE_EV
|
|
|
@ -1,67 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _Particle_H
|
|
||||||
#define _Particle_H 1
|
|
||||||
|
|
||||||
#include "defs.h"
|
|
||||||
|
|
||||||
typedef struct _Particle particle;
|
|
||||||
typedef struct _Particle_Node particle_node;
|
|
||||||
|
|
||||||
typedef enum { ONE_CLUSTER, TWO_CLUSTER } cluster_type;
|
|
||||||
typedef enum { UNIFORM, PLUMMER } model_type;
|
|
||||||
|
|
||||||
/* Every particle has :
|
|
||||||
* 1. A unique ID number
|
|
||||||
* 2. An x and y position
|
|
||||||
* 3. A charge
|
|
||||||
* 4. The field that acts on it due to every other particle
|
|
||||||
*
|
|
||||||
* The force is what the algorithm actually calculates.
|
|
||||||
*/
|
|
||||||
struct _Particle
|
|
||||||
{
|
|
||||||
long id;
|
|
||||||
real charge;
|
|
||||||
real mass;
|
|
||||||
vector pos;
|
|
||||||
vector acc;
|
|
||||||
vector vel;
|
|
||||||
complex field;
|
|
||||||
long cost;
|
|
||||||
real box;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/* This structure is used for linked lists of particles */
|
|
||||||
struct _Particle_Node
|
|
||||||
{
|
|
||||||
particle *data;
|
|
||||||
particle_node *next;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern long Total_Particles;
|
|
||||||
|
|
||||||
extern void CreateParticleList(long my_id, long length);
|
|
||||||
extern void InitParticleList(long my_id, long num_assigned, long starting_id);
|
|
||||||
extern void CreateDistribution(cluster_type cluster, model_type model);
|
|
||||||
extern void PrintParticle(particle *p);
|
|
||||||
extern void PrintAllParticles(void);
|
|
||||||
extern void PrintParticleArrayIds(particle **p_array, long num_particles);
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* _Particle_H */
|
|
|
@ -1,373 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include "defs.h"
|
|
||||||
#include "memory.h"
|
|
||||||
#include "particle.h"
|
|
||||||
#include "box.h"
|
|
||||||
#include "partition_grid.h"
|
|
||||||
|
|
||||||
#define DIVISOR(x) ((x <= 20) ? 1 : ((x - 20) * 50))
|
|
||||||
|
|
||||||
typedef struct _Id_Info id_info;
|
|
||||||
struct _Id_Info
|
|
||||||
{
|
|
||||||
long id;
|
|
||||||
long num;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct _Cost_Info cost_info;
|
|
||||||
struct _Cost_Info
|
|
||||||
{
|
|
||||||
long cost;
|
|
||||||
long num;
|
|
||||||
};
|
|
||||||
|
|
||||||
long CheckBox(long my_id, box *b, long partition_level);
|
|
||||||
|
|
||||||
void
|
|
||||||
InitPartition (long my_id)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
|
|
||||||
Local[my_id].Childless_Partition = NULL;
|
|
||||||
for (i = 0; i < MAX_LEVEL; i++) {
|
|
||||||
Local[my_id].Parent_Partition[i] = NULL;
|
|
||||||
}
|
|
||||||
Local[my_id].Max_Parent_Level = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
PartitionIterate (long my_id, partition_function function,
|
|
||||||
partition_start position)
|
|
||||||
{
|
|
||||||
box *b;
|
|
||||||
long i;
|
|
||||||
|
|
||||||
if (position == CHILDREN) {
|
|
||||||
b = Local[my_id].Childless_Partition;
|
|
||||||
while (b != NULL) {
|
|
||||||
(*function)(my_id, b);
|
|
||||||
b = b->next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (position == TOP) {
|
|
||||||
for (i = 0; i <= Local[my_id].Max_Parent_Level; i++) {
|
|
||||||
b = Local[my_id].Parent_Partition[i];
|
|
||||||
while (b != NULL) {
|
|
||||||
(*function)(my_id, b);
|
|
||||||
b = b->next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b = Local[my_id].Childless_Partition;
|
|
||||||
while (b != NULL) {
|
|
||||||
(*function)(my_id, b);
|
|
||||||
b = b->next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
b = Local[my_id].Childless_Partition;
|
|
||||||
while (b != NULL) {
|
|
||||||
(*function)(my_id, b);
|
|
||||||
b = b->next;
|
|
||||||
}
|
|
||||||
for (i = Local[my_id].Max_Parent_Level; i >= 0; i--) {
|
|
||||||
b = Local[my_id].Parent_Partition[i];
|
|
||||||
while (b != NULL) {
|
|
||||||
(*function)(my_id, b);
|
|
||||||
b = b->next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
InsertBoxInPartition (long my_id, box *b)
|
|
||||||
{
|
|
||||||
box *level_list;
|
|
||||||
|
|
||||||
if (b->type == CHILDLESS) {
|
|
||||||
b->prev = NULL;
|
|
||||||
if (Local[my_id].Childless_Partition != NULL)
|
|
||||||
Local[my_id].Childless_Partition->prev = b;
|
|
||||||
b->next = Local[my_id].Childless_Partition;
|
|
||||||
Local[my_id].Childless_Partition = b;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
level_list = Local[my_id].Parent_Partition[b->level];
|
|
||||||
b->prev = NULL;
|
|
||||||
if (level_list != NULL)
|
|
||||||
level_list->prev = b;
|
|
||||||
b->next = level_list;
|
|
||||||
Local[my_id].Parent_Partition[b->level] = b;
|
|
||||||
if (b->level > Local[my_id].Max_Parent_Level) {
|
|
||||||
Local[my_id].Max_Parent_Level = b->level;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
RemoveBoxFromPartition (long my_id, box *b)
|
|
||||||
{
|
|
||||||
if (b->type == CHILDLESS) {
|
|
||||||
if (b->prev != NULL)
|
|
||||||
b->prev->next = b->next;
|
|
||||||
else
|
|
||||||
Local[my_id].Childless_Partition = b->next;
|
|
||||||
if (b->next != NULL)
|
|
||||||
b->next->prev = b->prev;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (b->prev != NULL)
|
|
||||||
b->prev->next = b->next;
|
|
||||||
else
|
|
||||||
Local[my_id].Parent_Partition[b->level] = b->next;
|
|
||||||
if (b->next != NULL)
|
|
||||||
b->next->prev = b->prev;
|
|
||||||
if ((b->level == Local[my_id].Max_Parent_Level) &&
|
|
||||||
(Local[my_id].Parent_Partition[b->level] == NULL)) {
|
|
||||||
while (Local[my_id].Parent_Partition[Local[my_id].Max_Parent_Level]
|
|
||||||
== NULL)
|
|
||||||
Local[my_id].Max_Parent_Level -= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
ComputeCostOfBox (box *b)
|
|
||||||
{
|
|
||||||
long different_costs;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long new_cost;
|
|
||||||
cost_info cost_list[MAX_PARTICLES_PER_BOX];
|
|
||||||
cost_info winner;
|
|
||||||
long winner_index;
|
|
||||||
long cost_index[MAX_PARTICLES_PER_BOX];
|
|
||||||
|
|
||||||
if (b->type == PARENT)
|
|
||||||
b->cost = ((b->num_v_list * V_LIST_COST(Expansion_Terms))
|
|
||||||
/ DIVISOR(Expansion_Terms)) + 1;
|
|
||||||
else {
|
|
||||||
different_costs = 0;
|
|
||||||
for (i = 0; i < b->num_particles; i++) {
|
|
||||||
new_cost = b->particles[i]->cost;
|
|
||||||
for (j = 0; j < different_costs; j++) {
|
|
||||||
if (new_cost == cost_list[j].cost)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (j == different_costs) {
|
|
||||||
cost_list[different_costs].cost = new_cost;
|
|
||||||
cost_list[different_costs].num = 1;
|
|
||||||
different_costs += 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
cost_list[j].num += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
winner.cost = cost_list[0].cost;
|
|
||||||
winner.num = 1;
|
|
||||||
winner_index = 0;
|
|
||||||
cost_index[0] = 0;
|
|
||||||
for (i = 1; i < different_costs; i++) {
|
|
||||||
if (cost_list[i].num > cost_list[winner_index].num) {
|
|
||||||
winner.cost = cost_list[i].cost;
|
|
||||||
winner.num = 1;
|
|
||||||
winner_index = i;
|
|
||||||
cost_index[0] = i;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (cost_list[i].num == cost_list[winner_index].num) {
|
|
||||||
cost_index[winner.num] = i;
|
|
||||||
winner.num += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (winner.num != 1) {
|
|
||||||
for (i = 1; i < winner.num; i++)
|
|
||||||
winner.cost += cost_list[cost_index[i]].cost;
|
|
||||||
winner.cost /= winner.num;
|
|
||||||
}
|
|
||||||
b->cost = (winner.cost * b->num_particles) / DIVISOR(Expansion_Terms);
|
|
||||||
if (b->cost == 0)
|
|
||||||
b->cost = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
CheckPartition (long my_id)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
box *b;
|
|
||||||
long NE, NoP, CB, PB;
|
|
||||||
long Q1, Q2, Q3, Q4;
|
|
||||||
long PC, CC;
|
|
||||||
real xpos, ypos;
|
|
||||||
|
|
||||||
NE = NoP = CB = PB = Q1 = Q2 = Q3 = Q4 = PC = CC = 0;
|
|
||||||
for (i = 0; i <= Local[my_id].Max_Parent_Level; i++) {
|
|
||||||
b = Local[my_id].Parent_Partition[i];
|
|
||||||
while (b != NULL) {
|
|
||||||
NE += CheckBox(my_id, b, i);
|
|
||||||
PB += 1;
|
|
||||||
PC += b->cost;
|
|
||||||
b = b->next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b = Local[my_id].Childless_Partition;
|
|
||||||
while (b != NULL) {
|
|
||||||
NE += CheckBox(my_id, b, -1);
|
|
||||||
for (i = 0; i < b->num_particles; i++) {
|
|
||||||
xpos = b->particles[i]->pos.x;
|
|
||||||
ypos = b->particles[i]->pos.y;
|
|
||||||
if (xpos > Grid->x_center) {
|
|
||||||
if (ypos > Grid->y_center)
|
|
||||||
Q1 += 1;
|
|
||||||
else
|
|
||||||
Q4 += 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (ypos > Grid->y_center)
|
|
||||||
Q2 += 1;
|
|
||||||
else
|
|
||||||
Q3 += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
NoP += b->num_particles;
|
|
||||||
CB += 1;
|
|
||||||
CC += b->cost;
|
|
||||||
b = b->next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
long
|
|
||||||
CheckBox (long my_id, box *b, long partition_level)
|
|
||||||
{
|
|
||||||
long num_errors;
|
|
||||||
|
|
||||||
num_errors = 0;
|
|
||||||
if (b->type == CHILDLESS) {
|
|
||||||
if (partition_level != -1) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : CHILDLESS box in parent partition (B%f P%ld %ld)\n", b->id, my_id, b->proc);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
if (b->num_children != 0) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : CHILDLESS box has children (B%f P%ld)\n", b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
if (b->num_particles == 0) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : CHILDLESS box has no particles (B%f P%ld)\n", b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
if (b->particles[b->num_particles - 1] == NULL) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : CHILDLESS box has fewer particles than expected ");
|
|
||||||
printf("(B%f P%ld)\n", b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
if (b->particles[b->num_particles] != NULL) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : CHILDLESS box has more particles than expected ");
|
|
||||||
printf("(B%f P%ld)\n", b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (partition_level == -1) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : PARENT box in childless partition (B%f P%ld %ld)\n",
|
|
||||||
b->id, my_id, b->proc);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (partition_level != b->level) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : PARENT box in wrong partition level ");
|
|
||||||
printf("(%ld vs %ld) (B%f P%ld)\n", b->level, partition_level, b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (b->num_children == 0) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : PARENT box has no children (B%f P%ld)\n", b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
if (b->num_particles != 0) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
printf("ERROR : PARENT box has particles (B%f P%ld)\n", b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (b->parent == NULL) {
|
|
||||||
if (b != Grid) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
if (b->type == CHILDLESS)
|
|
||||||
printf("ERROR : Extra CHILDLESS box in partition (B%f P%ld)\n", b->id, my_id);
|
|
||||||
else
|
|
||||||
printf("ERROR : Extra PARENT box in partition (B%f P%ld)\n", b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (b->parent->children[b->child_num] != b) {
|
|
||||||
LOCK(G_Memory->io_lock);
|
|
||||||
if (b->type == CHILDLESS)
|
|
||||||
printf("ERROR : Extra CHILDLESS box in partition (B%f P%ld)\n", b->id, my_id);
|
|
||||||
else
|
|
||||||
printf("ERROR : Extra PARENT box in partition (B%f P%ld)\n", b->id, my_id);
|
|
||||||
fflush(stdout);
|
|
||||||
UNLOCK(G_Memory->io_lock);
|
|
||||||
num_errors += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return num_errors;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#undef DIVISOR
|
|
|
@ -1,37 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#ifndef _Partition_H
|
|
||||||
#define _Partition_H 1
|
|
||||||
|
|
||||||
#include "defs.h"
|
|
||||||
#include "box.h"
|
|
||||||
|
|
||||||
/* Void function type */
|
|
||||||
typedef void (*partition_function)(long my_id, box *b);
|
|
||||||
|
|
||||||
typedef enum { TOP, BOTTOM, CHILDREN } partition_start;
|
|
||||||
typedef enum { ORB, COST_ZONES } partition_alg;
|
|
||||||
|
|
||||||
extern void InitPartition(long my_id);
|
|
||||||
extern void PartitionIterate(long my_id, partition_function function,
|
|
||||||
partition_start position);
|
|
||||||
extern void InsertBoxInPartition(long my_id, box *b);
|
|
||||||
extern void RemoveBoxFromPartition(long my_id, box *b);
|
|
||||||
extern void ComputeCostOfBox(box *b);
|
|
||||||
extern void CheckPartition(long my_id);
|
|
||||||
|
|
||||||
#endif /* _Partition_H */
|
|
|
@ -1,83 +0,0 @@
|
||||||
GENERAL INFORMATION:
|
|
||||||
|
|
||||||
The OCEAN program simulates large-scale ocean movements based on eddy and
|
|
||||||
boundary currents, and is an enhanced version of the SPLASH Ocean code.
|
|
||||||
A description of the functionality of this code can be found in the
|
|
||||||
original SPLASH report. The implementations contained in SPLASH-2
|
|
||||||
differ from the original SPLASH implementation in the following ways:
|
|
||||||
|
|
||||||
(1) The SPLASH-2 implementations are written in C rather than
|
|
||||||
FORTRAN.
|
|
||||||
(2) Grids are partitioned into square-like subgrids rather than
|
|
||||||
groups of columns to improve the communication to computation
|
|
||||||
ratio.
|
|
||||||
(3) The SOR solver in the SPLASH Ocean code has been replaced with a
|
|
||||||
restricted Red-Black Gauss-Seidel Multigrid solver based on that
|
|
||||||
presented in:
|
|
||||||
|
|
||||||
Brandt, A. Multi-Level Adaptive Solutions to Boundary-Value Problems.
|
|
||||||
Mathematics of Computation, 31(138):333-390, April 1977.
|
|
||||||
|
|
||||||
The solver is restricted so that each processor has as least two
|
|
||||||
grid points in each dimension in each grid subpartition.
|
|
||||||
|
|
||||||
Two implementations are provided in the SPLASH-2 distribution:
|
|
||||||
|
|
||||||
(1) Non-contiguous partition allocation
|
|
||||||
|
|
||||||
This implementation (contained in the non_contiguous_partitions
|
|
||||||
subdirectory) implements the grids to be operated on with
|
|
||||||
two-dimensional arrays. This data structure prevents partitions
|
|
||||||
from being allocated contiguously, but leads to a conceptually
|
|
||||||
simple programming implementation.
|
|
||||||
|
|
||||||
(2) Contiguous partition allocation
|
|
||||||
|
|
||||||
This implementation (contained in the contiguous_partitions
|
|
||||||
subdirectory) implements the grids to be operated on with
|
|
||||||
3-dimensional arrays. The first dimension specifies the processor
|
|
||||||
which owns the partition, and the second and third dimensions
|
|
||||||
specify the x and y offset within a partition. This data structure
|
|
||||||
allows partitions to be allocated contiguously and entirely in the
|
|
||||||
local memory of processors that "own" them, thus enhancing data
|
|
||||||
locality properties.
|
|
||||||
|
|
||||||
The contiguous partition allocation implementation is described in:
|
|
||||||
|
|
||||||
Woo, S. C., Singh, J. P., and Hennessy, J. L. The Performance Advantages
|
|
||||||
of Integrating Message Passing in Cache-Coherent Multiprocessors.
|
|
||||||
Technical Report CSL-TR-93-593, Stanford University, December 1993.
|
|
||||||
|
|
||||||
A detailed description of both versions will appear in the SPLASH-2 report.
|
|
||||||
The non-contiguous partition allocation implementation is conceptually
|
|
||||||
similar, except for the use of statically allocated 2-dimensional arrays.
|
|
||||||
|
|
||||||
These programs work under both the Unix FORK and SPROC models.
|
|
||||||
|
|
||||||
RUNNING THE PROGRAM:
|
|
||||||
|
|
||||||
To see how to run the program, please see the comment at the top of the
|
|
||||||
file main.C, or run the application with the "-h" command line option.
|
|
||||||
Five command line parameters can be specified, of which the ones which
|
|
||||||
would normally be changed are the number of grid points in each dimension,
|
|
||||||
and the number of processors. The number of grid points must be a
|
|
||||||
(power of 2+2) in each dimension (e.g. 130, 258, etc.). The number of
|
|
||||||
processors must be a power of 2. Timing information is printed out at
|
|
||||||
the end of the program. The first timestep is considered part of the
|
|
||||||
initialization phase of the program, and hence is not included in the
|
|
||||||
"Total time without initialization."
|
|
||||||
|
|
||||||
BASE PROBLEM SIZE:
|
|
||||||
|
|
||||||
The base problem size for an upto-64 processor machine is a 258x258 grid.
|
|
||||||
The default values should be used for other parameters (except the number
|
|
||||||
of processors, which can be varied). In addition, sample output files
|
|
||||||
for the default parameters for each version of the code are contained in
|
|
||||||
the file correct.out in each subdirectory.
|
|
||||||
|
|
||||||
DATA DISTRIBUTION:
|
|
||||||
|
|
||||||
Our "POSSIBLE ENHANCEMENT" comments in the source code tell where one
|
|
||||||
might want to distribute data and how. Data distribution has an impact
|
|
||||||
on performance on the Stanford DASH multiprocessor.
|
|
||||||
|
|
|
@ -1,16 +0,0 @@
|
||||||
TARGET = OCEAN
|
|
||||||
OBJS = jacobcalc.o jacobcalc2.o laplacalc.o linkup.o main.o multi.o slave1.o slave2.o subblock.o
|
|
||||||
|
|
||||||
include ../../../Makefile.config
|
|
||||||
|
|
||||||
decs.h: decs.H
|
|
||||||
jacobcalc.c: decs.h
|
|
||||||
linkup.c: decs.h
|
|
||||||
slave1.c: decs.h
|
|
||||||
jacobcalc2.c: decs.h
|
|
||||||
main.c: decs.h
|
|
||||||
slave2.c: decs.h
|
|
||||||
laplacalc.c: decs.h
|
|
||||||
multi.c: decs.h
|
|
||||||
subblock.c: decs.h
|
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
shmid 0x2fa8 shmvaddr 0x500000
|
|
||||||
|
|
||||||
Ocean simulation with W-cycle multigrid solver
|
|
||||||
Processors : 1
|
|
||||||
Grid size : 258 x 258
|
|
||||||
Grid resolution (meters) : 20000.00
|
|
||||||
Time between relaxations (seconds) : 28800
|
|
||||||
Error tolerance : 1e-07
|
|
||||||
|
|
||||||
MULTIGRID OUTPUTS
|
|
||||||
iter 71, level 7, residual norm 8.00274594e-08, work = 33.875
|
|
||||||
iter 31, level 7, residual norm 4.08062997e-08, work = 13.563
|
|
||||||
iter 22, level 7, residual norm 5.94548243e-08, work = 9.438
|
|
||||||
iter 12, level 7, residual norm 4.05573539e-08, work = 6.188
|
|
||||||
iter 2, level 7, residual norm 8.20209761e-08, work = 2.000
|
|
||||||
iter 5, level 7, residual norm 6.54258351e-08, work = 5.000
|
|
||||||
iter 3, level 7, residual norm 7.23930444e-08, work = 3.000
|
|
||||||
iter 12, level 7, residual norm 3.56346335e-08, work = 6.188
|
|
||||||
iter 2, level 7, residual norm 5.93080936e-08, work = 2.000
|
|
||||||
iter 4, level 7, residual norm 8.54596942e-08, work = 4.000
|
|
||||||
iter 11, level 7, residual norm 3.70162668e-08, work = 6.125
|
|
||||||
iter 13, level 7, residual norm 3.34750526e-08, work = 7.188
|
|
||||||
iter 12, level 7, residual norm 2.45353138e-08, work = 6.188
|
|
||||||
|
|
||||||
PROCESS STATISTICS
|
|
||||||
Total Multigrid Multigrid
|
|
||||||
Proc Time Time Fraction
|
|
||||||
0 50030404 20050068 0.401
|
|
||||||
Avg 50030404 20050068 0.401
|
|
||||||
Min 50030404 20050068 0.401
|
|
||||||
Max 50030404 20050068 0.401
|
|
||||||
|
|
||||||
TIMING INFORMATION
|
|
||||||
Start time : 1114891426
|
|
||||||
Initialization finish time : 1152301729
|
|
||||||
Overall finish time : 1202332135
|
|
||||||
Total time with initialization : 87440709
|
|
||||||
Total time without initialization : 50030406
|
|
||||||
(excludes first timestep)
|
|
||||||
|
|
|
@ -1,228 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#define MASTER 0
|
|
||||||
#define RED_ITER 0
|
|
||||||
#define BLACK_ITER 1
|
|
||||||
#define UP 0
|
|
||||||
#define DOWN 1
|
|
||||||
#define LEFT 2
|
|
||||||
#define RIGHT 3
|
|
||||||
#define UPLEFT 4
|
|
||||||
#define UPRIGHT 5
|
|
||||||
#define DOWNLEFT 6
|
|
||||||
#define DOWNRIGHT 7
|
|
||||||
#define PAGE_SIZE 4096
|
|
||||||
|
|
||||||
struct multi_struct {
|
|
||||||
double err_multi;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct multi_struct *multi;
|
|
||||||
|
|
||||||
struct global_struct {
|
|
||||||
long id;
|
|
||||||
long starttime;
|
|
||||||
long trackstart;
|
|
||||||
double psiai;
|
|
||||||
double psibi;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct global_struct *global;
|
|
||||||
|
|
||||||
extern double eig2;
|
|
||||||
extern double ysca;
|
|
||||||
extern long jmm1;
|
|
||||||
extern double pi;
|
|
||||||
extern double t0;
|
|
||||||
|
|
||||||
extern double ****psi;
|
|
||||||
extern double ****psim;
|
|
||||||
extern double ***psium;
|
|
||||||
extern double ***psilm;
|
|
||||||
extern double ***psib;
|
|
||||||
extern double ***ga;
|
|
||||||
extern double ***gb;
|
|
||||||
extern double ****work1;
|
|
||||||
extern double ***work2;
|
|
||||||
extern double ***work3;
|
|
||||||
extern double ****work4;
|
|
||||||
extern double ****work5;
|
|
||||||
extern double ***work6;
|
|
||||||
extern double ****work7;
|
|
||||||
extern double ****temparray;
|
|
||||||
extern double ***tauz;
|
|
||||||
extern double ***oldga;
|
|
||||||
extern double ***oldgb;
|
|
||||||
extern double *f;
|
|
||||||
extern double ****q_multi;
|
|
||||||
extern double ****rhs_multi;
|
|
||||||
|
|
||||||
struct locks_struct {
|
|
||||||
LOCKDEC(idlock)
|
|
||||||
LOCKDEC(psiailock)
|
|
||||||
LOCKDEC(psibilock)
|
|
||||||
LOCKDEC(donelock)
|
|
||||||
LOCKDEC(error_lock)
|
|
||||||
LOCKDEC(bar_lock)
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct locks_struct *locks;
|
|
||||||
|
|
||||||
struct bars_struct {
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARDEC(iteration)
|
|
||||||
BARDEC(gsudn)
|
|
||||||
BARDEC(p_setup)
|
|
||||||
BARDEC(p_redph)
|
|
||||||
BARDEC(p_soln)
|
|
||||||
BARDEC(p_subph)
|
|
||||||
BARDEC(sl_prini)
|
|
||||||
BARDEC(sl_psini)
|
|
||||||
BARDEC(sl_onetime)
|
|
||||||
BARDEC(sl_phase_1)
|
|
||||||
BARDEC(sl_phase_2)
|
|
||||||
BARDEC(sl_phase_3)
|
|
||||||
BARDEC(sl_phase_4)
|
|
||||||
BARDEC(sl_phase_5)
|
|
||||||
BARDEC(sl_phase_6)
|
|
||||||
BARDEC(sl_phase_7)
|
|
||||||
BARDEC(sl_phase_8)
|
|
||||||
BARDEC(sl_phase_9)
|
|
||||||
BARDEC(sl_phase_10)
|
|
||||||
BARDEC(error_barrier)
|
|
||||||
#else
|
|
||||||
BARDEC(barrier)
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct bars_struct *bars;
|
|
||||||
|
|
||||||
extern double factjacob;
|
|
||||||
extern double factlap;
|
|
||||||
|
|
||||||
struct Global_Private {
|
|
||||||
char pad[PAGE_SIZE];
|
|
||||||
long *rel_num_x;
|
|
||||||
long *rel_num_y;
|
|
||||||
long *eist;
|
|
||||||
long *ejst;
|
|
||||||
long *oist;
|
|
||||||
long *ojst;
|
|
||||||
long *rlist;
|
|
||||||
long *rljst;
|
|
||||||
long *rlien;
|
|
||||||
long *rljen;
|
|
||||||
long rownum;
|
|
||||||
long colnum;
|
|
||||||
long neighbors[8];
|
|
||||||
double multi_time;
|
|
||||||
double total_time;
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct Global_Private *gp;
|
|
||||||
|
|
||||||
extern double *i_int_coeff;
|
|
||||||
extern double *j_int_coeff;
|
|
||||||
extern long xprocs;
|
|
||||||
extern long yprocs;
|
|
||||||
|
|
||||||
extern long numlev;
|
|
||||||
extern long *imx;
|
|
||||||
extern long *jmx;
|
|
||||||
extern double *lev_res;
|
|
||||||
extern double *lev_tol;
|
|
||||||
extern double maxwork;
|
|
||||||
extern long *xpts_per_proc;
|
|
||||||
extern long *ypts_per_proc;
|
|
||||||
extern long minlevel;
|
|
||||||
extern double outday0;
|
|
||||||
extern double outday1;
|
|
||||||
extern double outday2;
|
|
||||||
extern double outday3;
|
|
||||||
|
|
||||||
extern long nprocs;
|
|
||||||
extern double h1;
|
|
||||||
extern double h3;
|
|
||||||
extern double h;
|
|
||||||
extern double lf;
|
|
||||||
extern double res;
|
|
||||||
extern double dtau;
|
|
||||||
extern double f0;
|
|
||||||
extern double beta;
|
|
||||||
extern double gpr;
|
|
||||||
extern long im;
|
|
||||||
extern long jm;
|
|
||||||
extern long do_stats;
|
|
||||||
extern long do_output;
|
|
||||||
extern long *multi_times;
|
|
||||||
extern long *total_times;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* jacobcalc.C
|
|
||||||
*/
|
|
||||||
void jacobcalc(double ***x, double ***y, double ***z, long pid, long firstrow, long lastrow, long firstcol, long lastcol);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* jacobcalc2.C
|
|
||||||
*/
|
|
||||||
void jacobcalc2(double ****x, double ****y, double ****z, long psiindex, long pid, long firstrow, long lastrow, long firstcol, long lastcol);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* laplacalc.C
|
|
||||||
*/
|
|
||||||
void laplacalc(long procid, double ****x, double ****z, long psiindex, long firstrow, long lastrow, long firstcol, long lastcol);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* linkup.C
|
|
||||||
*/
|
|
||||||
void link_all(void);
|
|
||||||
void linkup(double **row_ptr);
|
|
||||||
void link_multi(void);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* main.C
|
|
||||||
*/
|
|
||||||
long log_2(long number);
|
|
||||||
void printerr(char *s);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* multi.C
|
|
||||||
*/
|
|
||||||
void multig(long my_id);
|
|
||||||
void relax(long k, double *err, long color, long my_num);
|
|
||||||
void rescal(long kf, long my_num);
|
|
||||||
void intadd(long kc, long my_num);
|
|
||||||
void putz(long k, long my_num);
|
|
||||||
void copy_borders(long k, long pid);
|
|
||||||
void copy_rhs_borders(long k, long procid);
|
|
||||||
void copy_red(long k, long procid);
|
|
||||||
void copy_black(long k, long procid);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* slave1.C
|
|
||||||
*/
|
|
||||||
void slave(void);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* slave2.C
|
|
||||||
*/
|
|
||||||
void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* subblock.C
|
|
||||||
*/
|
|
||||||
void subblock(void);
|
|
|
@ -1,352 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/* Does the arakawa jacobian calculation (of the x and y matrices,
|
|
||||||
putting the results in the z matrix) for a subblock. */
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
void jacobcalc(double ***x, double ***y, double ***z, long pid, long firstrow, long lastrow, long firstcol, long lastcol)
|
|
||||||
{
|
|
||||||
double f1;
|
|
||||||
double f2;
|
|
||||||
double f3;
|
|
||||||
double f4;
|
|
||||||
double f5;
|
|
||||||
double f6;
|
|
||||||
double f7;
|
|
||||||
double f8;
|
|
||||||
long iindex;
|
|
||||||
long indexp1;
|
|
||||||
long indexm1;
|
|
||||||
long im1;
|
|
||||||
long ip1;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long jj;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double **t2c;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
double *t1c;
|
|
||||||
double *t1d;
|
|
||||||
double *t1e;
|
|
||||||
double *t1f;
|
|
||||||
double *t1g;
|
|
||||||
|
|
||||||
t2a = (double **) z[pid];
|
|
||||||
if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) x[pid];
|
|
||||||
jj = gp[pid].neighbors[UPLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0]=x[jj][im-2][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[UPRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1]=x[jj][im-2][1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0]=x[jj][1][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1]=x[jj][1][1];
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) y[pid];
|
|
||||||
jj = gp[pid].neighbors[UPLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0]=y[jj][im-2][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[UPRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1]=y[jj][im-2][1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0]=y[jj][1][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1]=y[jj][1][1];
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) x[pid];
|
|
||||||
if (gp[pid].neighbors[UP] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = x[jj][0][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = x[jj][1][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = x[jj][0][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = x[jj][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = x[jj][im-1][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = x[jj][im-2][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = x[jj][im-1][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = x[jj][im-2][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = x[jj][im-2][0];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = x[jj][1][0];
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = x[jj][im-2][jm-1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = x[jj][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) y[pid];
|
|
||||||
if (gp[pid].neighbors[UP] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = y[jj][0][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = y[jj][1][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = y[jj][0][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = y[jj][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = y[jj][im-1][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = y[jj][im-2][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = y[jj][im-1][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = y[jj][im-2][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = y[jj][im-2][0];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = y[jj][1][0];
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = y[jj][im-2][jm-1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = y[jj][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
j = gp[pid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) y[j][im-2];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) y[j][1];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) y[j];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) y[j];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) x[pid];
|
|
||||||
j = gp[pid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) x[j][im-2];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) x[j][1];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) x[j];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) x[j];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) x[pid];
|
|
||||||
t2b = (double **) y[pid];
|
|
||||||
t2c = (double **) z[pid];
|
|
||||||
for (i=firstrow;i<=lastrow;i++) {
|
|
||||||
ip1 = i+1;
|
|
||||||
im1 = i-1;
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
t1c = (double *) t2c[i];
|
|
||||||
t1d = (double *) t2b[ip1];
|
|
||||||
t1e = (double *) t2b[im1];
|
|
||||||
t1f = (double *) t2a[ip1];
|
|
||||||
t1g = (double *) t2a[im1];
|
|
||||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
indexp1 = iindex+1;
|
|
||||||
indexm1 = iindex-1;
|
|
||||||
f1 = (t1b[indexm1]+t1d[indexm1]-
|
|
||||||
t1b[indexp1]-t1d[indexp1])*
|
|
||||||
(t1f[iindex]-t1a[iindex]);
|
|
||||||
f2 = (t1e[indexm1]+t1b[indexm1]-
|
|
||||||
t1e[indexp1]-t1b[indexp1])*
|
|
||||||
(t1a[iindex]-t1g[iindex]);
|
|
||||||
f3 = (t1d[iindex]+t1d[indexp1]-
|
|
||||||
t1e[iindex]-t1e[indexp1])*
|
|
||||||
(t1a[indexp1]-t1a[iindex]);
|
|
||||||
f4 = (t1d[indexm1]+t1d[iindex]-
|
|
||||||
t1e[indexm1]-t1e[iindex])*
|
|
||||||
(t1a[iindex]-t1a[indexm1]);
|
|
||||||
f5 = (t1d[iindex]-t1b[indexp1])*
|
|
||||||
(t1f[indexp1]-t1a[iindex]);
|
|
||||||
f6 = (t1b[indexm1]-t1e[iindex])*
|
|
||||||
(t1a[iindex]-t1g[indexm1]);
|
|
||||||
f7 = (t1b[indexp1]-t1e[iindex])*
|
|
||||||
(t1g[indexp1]-t1a[iindex]);
|
|
||||||
f8 = (t1d[iindex]-t1b[indexm1])*
|
|
||||||
(t1a[iindex]-t1f[indexm1]);
|
|
||||||
|
|
||||||
t1c[iindex] = factjacob*(f1+f2+f3+f4+f5+f6+f7+f8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gp[pid].neighbors[UP] == -1) {
|
|
||||||
t1c = (double *) t2c[0];
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1c[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[pid].neighbors[DOWN] == -1) {
|
|
||||||
t1c = (double *) t2c[im-1];
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1c[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[pid].neighbors[LEFT] == -1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2c[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[pid].neighbors[RIGHT] == -1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2c[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,354 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/* Does the arakawa jacobian calculation (of the x and y matrices,
|
|
||||||
putting the results in the z matrix) for a subblock. */
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
void jacobcalc2(double ****x, double ****y, double ****z, long psiindex, long pid, long firstrow, long lastrow, long firstcol, long lastcol)
|
|
||||||
{
|
|
||||||
double f1;
|
|
||||||
double f2;
|
|
||||||
double f3;
|
|
||||||
double f4;
|
|
||||||
double f5;
|
|
||||||
double f6;
|
|
||||||
double f7;
|
|
||||||
double f8;
|
|
||||||
long iindex;
|
|
||||||
long indexp1;
|
|
||||||
long indexm1;
|
|
||||||
long im1;
|
|
||||||
long ip1;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long jj;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double **t2c;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
double *t1c;
|
|
||||||
double *t1d;
|
|
||||||
double *t1e;
|
|
||||||
double *t1f;
|
|
||||||
double *t1g;
|
|
||||||
|
|
||||||
t2a = z[pid][psiindex];
|
|
||||||
if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = x[pid][psiindex];
|
|
||||||
jj = gp[pid].neighbors[UPLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0]=x[jj][psiindex][im-2][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[UPRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1]=x[jj][psiindex][im-2][1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0]=x[jj][psiindex][1][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1]=x[jj][psiindex][1][1];
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = y[pid][psiindex];
|
|
||||||
jj = gp[pid].neighbors[UPLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0]=y[jj][psiindex][im-2][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[UPRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1]=y[jj][psiindex][im-2][1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0]=y[jj][psiindex][1][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1]=y[jj][psiindex][1][1];
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = x[pid][psiindex];
|
|
||||||
if (gp[pid].neighbors[UP] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = x[jj][psiindex][0][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = x[jj][psiindex][1][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = x[jj][psiindex][0][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = x[jj][psiindex][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = x[jj][psiindex][im-1][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = x[jj][psiindex][im-2][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = x[jj][psiindex][im-1][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = x[jj][psiindex][im-2][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = x[jj][psiindex][im-2][0];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = x[jj][psiindex][1][0];
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = x[jj][psiindex][im-2][jm-1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = x[jj][psiindex][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = y[pid][psiindex];
|
|
||||||
if (gp[pid].neighbors[UP] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = y[jj][psiindex][0][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = y[jj][psiindex][1][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = y[jj][psiindex][0][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = y[jj][psiindex][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = y[jj][psiindex][im-1][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = y[jj][psiindex][im-2][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = y[jj][psiindex][im-1][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = y[jj][psiindex][im-2][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = y[jj][psiindex][im-2][0];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = y[jj][psiindex][1][0];
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = y[jj][psiindex][im-2][jm-1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = y[jj][psiindex][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = y[pid][psiindex];
|
|
||||||
j = gp[pid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) y[j][psiindex][im-2];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) y[j][psiindex][1];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = y[j][psiindex];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = y[j][psiindex];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = x[pid][psiindex];
|
|
||||||
j = gp[pid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) x[j][psiindex][im-2];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) x[j][psiindex][1];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = x[j][psiindex];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = x[j][psiindex];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = x[pid][psiindex];
|
|
||||||
t2b = y[pid][psiindex];
|
|
||||||
t2c = z[pid][psiindex];
|
|
||||||
for (i=firstrow;i<=lastrow;i++) {
|
|
||||||
ip1 = i+1;
|
|
||||||
im1 = i-1;
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
t1c = (double *) t2c[i];
|
|
||||||
t1d = (double *) t2b[ip1];
|
|
||||||
t1e = (double *) t2b[im1];
|
|
||||||
t1f = (double *) t2a[ip1];
|
|
||||||
t1g = (double *) t2a[im1];
|
|
||||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
indexp1 = iindex+1;
|
|
||||||
indexm1 = iindex-1;
|
|
||||||
f1 = (t1b[indexm1]+t1d[indexm1]-
|
|
||||||
t1b[indexp1]-t1d[indexp1])*
|
|
||||||
(t1f[iindex]-t1a[iindex]);
|
|
||||||
f2 = (t1e[indexm1]+t1b[indexm1]-
|
|
||||||
t1e[indexp1]-t1b[indexp1])*
|
|
||||||
(t1a[iindex]-t1g[iindex]);
|
|
||||||
f3 = (t1d[iindex]+t1d[indexp1]-
|
|
||||||
t1e[iindex]-t1e[indexp1])*
|
|
||||||
(t1a[indexp1]-t1a[iindex]);
|
|
||||||
f4 = (t1d[indexm1]+t1d[iindex]-
|
|
||||||
t1e[indexm1]-t1e[iindex])*
|
|
||||||
(t1a[iindex]-t1a[indexm1]);
|
|
||||||
f5 = (t1d[iindex]-t1b[indexp1])*
|
|
||||||
(t1f[indexp1]-t1a[iindex]);
|
|
||||||
f6 = (t1b[indexm1]-t1e[iindex])*
|
|
||||||
(t1a[iindex]-t1g[indexm1]);
|
|
||||||
f7 = (t1b[indexp1]-t1e[iindex])*
|
|
||||||
(t1g[indexp1]-t1a[iindex]);
|
|
||||||
f8 = (t1d[iindex]-t1b[indexm1])*
|
|
||||||
(t1a[iindex]-t1f[indexm1]);
|
|
||||||
|
|
||||||
t1c[iindex] = factjacob*(f1+f2+f3+f4+f5+f6+f7+f8);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gp[pid].neighbors[UP] == -1) {
|
|
||||||
t1c = (double *) t2c[0];
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1c[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[pid].neighbors[DOWN] == -1) {
|
|
||||||
t1c = (double *) t2c[im-1];
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1c[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[pid].neighbors[LEFT] == -1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2c[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[pid].neighbors[RIGHT] == -1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2c[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,115 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/* Performs the laplacian calculation for a subblock */
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
void laplacalc(long procid, double ****x, double ****z, long psiindex, long firstrow, long lastrow, long firstcol, long lastcol)
|
|
||||||
{
|
|
||||||
long iindex;
|
|
||||||
long indexp1;
|
|
||||||
long indexm1;
|
|
||||||
long ip1;
|
|
||||||
long im1;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
double *t1c;
|
|
||||||
double *t1d;
|
|
||||||
|
|
||||||
t2a = (double **) x[procid][psiindex];
|
|
||||||
j = gp[procid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) x[j][psiindex][im-2];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) x[j][psiindex][1];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) x[j][psiindex];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) x[j][psiindex];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) x[procid][psiindex];
|
|
||||||
t2b = (double **) z[procid][psiindex];
|
|
||||||
for (i=firstrow;i<=lastrow;i++) {
|
|
||||||
ip1 = i+1;
|
|
||||||
im1 = i-1;
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
t1c = (double *) t2a[ip1];
|
|
||||||
t1d = (double *) t2a[im1];
|
|
||||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
indexp1 = iindex+1;
|
|
||||||
indexm1 = iindex-1;
|
|
||||||
t1b[iindex] = factlap*(t1c[iindex]+
|
|
||||||
t1d[iindex]+t1a[indexp1]+
|
|
||||||
t1a[indexm1]-4.*t1a[iindex]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1b = (double *) t2b[0];
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1b[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1b = (double *) t2b[im-1];
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1b[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2b[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2b[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
Binary file not shown.
|
@ -1,196 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/* Set all the pointers to the proper locations for the q_multi and
|
|
||||||
rhs_multi data structures */
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
void link_all()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
for (j=0;j<nprocs;j++) {
|
|
||||||
linkup(psium[j]);
|
|
||||||
linkup(psilm[j]);
|
|
||||||
linkup(psib[j]);
|
|
||||||
linkup(ga[j]);
|
|
||||||
linkup(gb[j]);
|
|
||||||
linkup(work2[j]);
|
|
||||||
linkup(work3[j]);
|
|
||||||
linkup(work6[j]);
|
|
||||||
linkup(tauz[j]);
|
|
||||||
linkup(oldga[j]);
|
|
||||||
linkup(oldgb[j]);
|
|
||||||
for (i=0;i<=1;i++) {
|
|
||||||
linkup(psi[j][i]);
|
|
||||||
linkup(psim[j][i]);
|
|
||||||
linkup(work1[j][i]);
|
|
||||||
linkup(work4[j][i]);
|
|
||||||
linkup(work5[j][i]);
|
|
||||||
linkup(work7[j][i]);
|
|
||||||
linkup(temparray[j][i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
link_multi();
|
|
||||||
}
|
|
||||||
|
|
||||||
void linkup(double **row_ptr)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
double *a;
|
|
||||||
double **row;
|
|
||||||
double **y;
|
|
||||||
long x_part;
|
|
||||||
long y_part;
|
|
||||||
|
|
||||||
x_part = (jm-2)/xprocs + 2;
|
|
||||||
y_part = (im-2)/yprocs + 2;
|
|
||||||
row = row_ptr;
|
|
||||||
y = row + y_part;
|
|
||||||
a = (double *) y;
|
|
||||||
for (i=0;i<y_part;i++) {
|
|
||||||
*row = (double *) a;
|
|
||||||
row++;
|
|
||||||
a += x_part;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void link_multi()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long l;
|
|
||||||
double *a;
|
|
||||||
double **row;
|
|
||||||
double **y;
|
|
||||||
unsigned long z;
|
|
||||||
unsigned long zz;
|
|
||||||
long x_part;
|
|
||||||
long y_part;
|
|
||||||
unsigned long d_size;
|
|
||||||
|
|
||||||
z = ((unsigned long) q_multi + nprocs*sizeof(double ***));
|
|
||||||
|
|
||||||
if (nprocs%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
z += sizeof(double ***);
|
|
||||||
}
|
|
||||||
|
|
||||||
d_size = numlev*sizeof(double **);
|
|
||||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
d_size += sizeof(double **);
|
|
||||||
}
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
d_size += ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
|
||||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
|
||||||
}
|
|
||||||
for (i=0;i<nprocs;i++) {
|
|
||||||
q_multi[i] = (double ***) z;
|
|
||||||
z += d_size;
|
|
||||||
}
|
|
||||||
for (j=0;j<nprocs;j++) {
|
|
||||||
zz = (unsigned long) q_multi[j];
|
|
||||||
zz += numlev*sizeof(double **);
|
|
||||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
zz += sizeof(double **);
|
|
||||||
}
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
d_size = ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
|
||||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
|
||||||
q_multi[j][i] = (double **) zz;
|
|
||||||
zz += d_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (l=0;l<numlev;l++) {
|
|
||||||
x_part = (jmx[l]-2)/xprocs + 2;
|
|
||||||
y_part = (imx[l]-2)/yprocs + 2;
|
|
||||||
for (j=0;j<nprocs;j++) {
|
|
||||||
row = q_multi[j][l];
|
|
||||||
y = row + y_part;
|
|
||||||
a = (double *) y;
|
|
||||||
for (i=0;i<y_part;i++) {
|
|
||||||
*row = (double *) a;
|
|
||||||
row++;
|
|
||||||
a += x_part;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
z = ((unsigned long) rhs_multi + nprocs*sizeof(double ***));
|
|
||||||
if (nprocs%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
z += sizeof(double ***);
|
|
||||||
}
|
|
||||||
|
|
||||||
d_size = numlev*sizeof(double **);
|
|
||||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
d_size += sizeof(double **);
|
|
||||||
}
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
d_size += ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
|
||||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
|
||||||
}
|
|
||||||
for (i=0;i<nprocs;i++) {
|
|
||||||
rhs_multi[i] = (double ***) z;
|
|
||||||
z += d_size;
|
|
||||||
}
|
|
||||||
for (j=0;j<nprocs;j++) {
|
|
||||||
zz = (unsigned long) rhs_multi[j];
|
|
||||||
zz += numlev*sizeof(double **);
|
|
||||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
zz += sizeof(double **);
|
|
||||||
}
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
d_size = ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
|
||||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
|
||||||
rhs_multi[j][i] = (double **) zz;
|
|
||||||
zz += d_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (l=0;l<numlev;l++) {
|
|
||||||
x_part = (jmx[l]-2)/xprocs + 2;
|
|
||||||
y_part = (imx[l]-2)/yprocs + 2;
|
|
||||||
for (j=0;j<nprocs;j++) {
|
|
||||||
row = rhs_multi[j][l];
|
|
||||||
y = row + y_part;
|
|
||||||
a = (double *) y;
|
|
||||||
for (i=0;i<y_part;i++) {
|
|
||||||
*row = (double *) a;
|
|
||||||
row++;
|
|
||||||
a += x_part;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -1,566 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* SPLASH Ocean Code */
|
|
||||||
/* */
|
|
||||||
/* This application studies the role of eddy and boundary currents in */
|
|
||||||
/* influencing large-scale ocean movements. This implementation uses */
|
|
||||||
/* dynamically allocated four-dimensional arrays for grid data storage. */
|
|
||||||
/* */
|
|
||||||
/* Command line options: */
|
|
||||||
/* */
|
|
||||||
/* -nN : Simulate NxN ocean. N must be (power of 2)+2. */
|
|
||||||
/* -pP : P = number of processors. P must be power of 2. */
|
|
||||||
/* -eE : E = error tolerance for iterative relaxation. */
|
|
||||||
/* -rR : R = distance between grid points in meters. */
|
|
||||||
/* -tT : T = timestep in seconds. */
|
|
||||||
/* -s : Print timing statistics. */
|
|
||||||
/* -o : Print out relaxation residual values. */
|
|
||||||
/* -h : Print out command line options. */
|
|
||||||
/* */
|
|
||||||
/* Default: OCEAN -n130 -p1 -e1e-7 -r20000.0 -t28800.0 */
|
|
||||||
/* */
|
|
||||||
/* NOTE: This code works under both the FORK and SPROC models. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
MAIN_ENV
|
|
||||||
|
|
||||||
#define DEFAULT_N 258
|
|
||||||
#define DEFAULT_P 1
|
|
||||||
#define DEFAULT_E 1e-7
|
|
||||||
#define DEFAULT_T 28800.0
|
|
||||||
#define DEFAULT_R 20000.0
|
|
||||||
#define UP 0
|
|
||||||
#define DOWN 1
|
|
||||||
#define LEFT 2
|
|
||||||
#define RIGHT 3
|
|
||||||
#define UPLEFT 4
|
|
||||||
#define UPRIGHT 5
|
|
||||||
#define DOWNLEFT 6
|
|
||||||
#define DOWNRIGHT 7
|
|
||||||
#define PAGE_SIZE 4096
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
struct multi_struct *multi;
|
|
||||||
struct global_struct *global;
|
|
||||||
struct locks_struct *locks;
|
|
||||||
struct bars_struct *bars;
|
|
||||||
|
|
||||||
double ****psi;
|
|
||||||
double ****psim;
|
|
||||||
double ***psium;
|
|
||||||
double ***psilm;
|
|
||||||
double ***psib;
|
|
||||||
double ***ga;
|
|
||||||
double ***gb;
|
|
||||||
double ****work1;
|
|
||||||
double ***work2;
|
|
||||||
double ***work3;
|
|
||||||
double ****work4;
|
|
||||||
double ****work5;
|
|
||||||
double ***work6;
|
|
||||||
double ****work7;
|
|
||||||
double ****temparray;
|
|
||||||
double ***tauz;
|
|
||||||
double ***oldga;
|
|
||||||
double ***oldgb;
|
|
||||||
double *f;
|
|
||||||
double ****q_multi;
|
|
||||||
double ****rhs_multi;
|
|
||||||
|
|
||||||
long nprocs = DEFAULT_P;
|
|
||||||
double h1 = 1000.0;
|
|
||||||
double h3 = 4000.0;
|
|
||||||
double h = 5000.0;
|
|
||||||
double lf = -5.12e11;
|
|
||||||
double res = DEFAULT_R;
|
|
||||||
double dtau = DEFAULT_T;
|
|
||||||
double f0 = 8.3e-5;
|
|
||||||
double beta = 2.0e-11;
|
|
||||||
double gpr = 0.02;
|
|
||||||
long im = DEFAULT_N;
|
|
||||||
long jm;
|
|
||||||
double tolerance = DEFAULT_E;
|
|
||||||
double eig2;
|
|
||||||
double ysca;
|
|
||||||
long jmm1;
|
|
||||||
double pi;
|
|
||||||
double t0 = 0.5e-4 ;
|
|
||||||
double outday0 = 1.0;
|
|
||||||
double outday1 = 2.0;
|
|
||||||
double outday2 = 2.0;
|
|
||||||
double outday3 = 2.0;
|
|
||||||
double factjacob;
|
|
||||||
double factlap;
|
|
||||||
long numlev;
|
|
||||||
long *imx;
|
|
||||||
long *jmx;
|
|
||||||
double *lev_res;
|
|
||||||
double *lev_tol;
|
|
||||||
double maxwork = 10000.0;
|
|
||||||
|
|
||||||
struct Global_Private *gp;
|
|
||||||
|
|
||||||
double *i_int_coeff;
|
|
||||||
double *j_int_coeff;
|
|
||||||
long xprocs;
|
|
||||||
long yprocs;
|
|
||||||
long *xpts_per_proc;
|
|
||||||
long *ypts_per_proc;
|
|
||||||
long minlevel;
|
|
||||||
long do_stats = 0;
|
|
||||||
long do_output = 0;
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long k;
|
|
||||||
long x_part;
|
|
||||||
long y_part;
|
|
||||||
long d_size;
|
|
||||||
long itemp;
|
|
||||||
long jtemp;
|
|
||||||
double procsqrt;
|
|
||||||
long temp = 0;
|
|
||||||
double min_total;
|
|
||||||
double max_total;
|
|
||||||
double avg_total;
|
|
||||||
double min_multi;
|
|
||||||
double max_multi;
|
|
||||||
double avg_multi;
|
|
||||||
double min_frac;
|
|
||||||
double max_frac;
|
|
||||||
double avg_frac;
|
|
||||||
long ch;
|
|
||||||
extern char *optarg;
|
|
||||||
unsigned long computeend;
|
|
||||||
unsigned long start;
|
|
||||||
|
|
||||||
CLOCK(start)
|
|
||||||
|
|
||||||
while ((ch = getopt(argc, argv, "n:p:e:r:t:soh")) != -1) {
|
|
||||||
switch(ch) {
|
|
||||||
case 'n': im = atoi(optarg);
|
|
||||||
if (log_2(im-2) == -1) {
|
|
||||||
printerr("Grid must be ((power of 2)+2) in each dimension\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 'p': nprocs = atoi(optarg);
|
|
||||||
if (nprocs < 1) {
|
|
||||||
printerr("P must be >= 1\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
if (log_2(nprocs) == -1) {
|
|
||||||
printerr("P must be a power of 2\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 'e': tolerance = atof(optarg); break;
|
|
||||||
case 'r': res = atof(optarg); break;
|
|
||||||
case 't': dtau = atof(optarg); break;
|
|
||||||
case 's': do_stats = !do_stats; break;
|
|
||||||
case 'o': do_output = !do_output; break;
|
|
||||||
case 'h': printf("Usage: OCEAN <options>\n\n");
|
|
||||||
printf("options:\n");
|
|
||||||
printf(" -nN : Simulate NxN ocean. N must be (power of 2)+2.\n");
|
|
||||||
printf(" -pP : P = number of processors. P must be power of 2.\n");
|
|
||||||
printf(" -eE : E = error tolerance for iterative relaxation.\n");
|
|
||||||
printf(" -rR : R = distance between grid points in meters.\n");
|
|
||||||
printf(" -tT : T = timestep in seconds.\n");
|
|
||||||
printf(" -s : Print timing statistics.\n");
|
|
||||||
printf(" -o : Print out relaxation residual values.\n");
|
|
||||||
printf(" -h : Print out command line options.\n\n");
|
|
||||||
printf("Default: OCEAN -n%1d -p%1d -e%1g -r%1g -t%1g\n",
|
|
||||||
DEFAULT_N,DEFAULT_P,DEFAULT_E,DEFAULT_R,DEFAULT_T);
|
|
||||||
exit(0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MAIN_INITENV(,60000000)
|
|
||||||
|
|
||||||
jm = im;
|
|
||||||
printf("\n");
|
|
||||||
printf("Ocean simulation with W-cycle multigrid solver\n");
|
|
||||||
printf(" Processors : %1ld\n",nprocs);
|
|
||||||
printf(" Grid size : %1ld x %1ld\n",im,jm);
|
|
||||||
printf(" Grid resolution (meters) : %0.2f\n",res);
|
|
||||||
printf(" Time between relaxations (seconds) : %0.0f\n",dtau);
|
|
||||||
printf(" Error tolerance : %0.7g\n",tolerance);
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
xprocs = 0;
|
|
||||||
yprocs = 0;
|
|
||||||
procsqrt = sqrt((double) nprocs);
|
|
||||||
j = (long) procsqrt;
|
|
||||||
while ((xprocs == 0) && (j > 0)) {
|
|
||||||
k = nprocs / j;
|
|
||||||
if (k * j == nprocs) {
|
|
||||||
if (k > j) {
|
|
||||||
xprocs = j;
|
|
||||||
yprocs = k;
|
|
||||||
} else {
|
|
||||||
xprocs = k;
|
|
||||||
yprocs = j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j--;
|
|
||||||
}
|
|
||||||
if (xprocs == 0) {
|
|
||||||
printerr("Could not find factors for subblocking\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
minlevel = 0;
|
|
||||||
itemp = 1;
|
|
||||||
jtemp = 1;
|
|
||||||
numlev = 0;
|
|
||||||
minlevel = 0;
|
|
||||||
while (itemp < (im-2)) {
|
|
||||||
itemp = itemp*2;
|
|
||||||
jtemp = jtemp*2;
|
|
||||||
if ((itemp/yprocs > 1) && (jtemp/xprocs > 1)) {
|
|
||||||
numlev++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numlev == 0) {
|
|
||||||
printerr("Must have at least 2 grid points per processor in each dimension\n");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
imx = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
jmx = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
lev_res = (double *) G_MALLOC(numlev*sizeof(double));
|
|
||||||
lev_tol = (double *) G_MALLOC(numlev*sizeof(double));
|
|
||||||
i_int_coeff = (double *) G_MALLOC(numlev*sizeof(double));
|
|
||||||
j_int_coeff = (double *) G_MALLOC(numlev*sizeof(double));
|
|
||||||
xpts_per_proc = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
ypts_per_proc = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
|
|
||||||
imx[numlev-1] = im;
|
|
||||||
jmx[numlev-1] = jm;
|
|
||||||
lev_res[numlev-1] = res;
|
|
||||||
lev_tol[numlev-1] = tolerance;
|
|
||||||
|
|
||||||
for (i=numlev-2;i>=0;i--) {
|
|
||||||
imx[i] = ((imx[i+1] - 2) / 2) + 2;
|
|
||||||
jmx[i] = ((jmx[i+1] - 2) / 2) + 2;
|
|
||||||
lev_res[i] = lev_res[i+1] * 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
xpts_per_proc[i] = (jmx[i]-2) / xprocs;
|
|
||||||
ypts_per_proc[i] = (imx[i]-2) / yprocs;
|
|
||||||
}
|
|
||||||
for (i=numlev-1;i>=0;i--) {
|
|
||||||
if ((xpts_per_proc[i] < 2) || (ypts_per_proc[i] < 2)) {
|
|
||||||
minlevel = i+1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
temp += imx[i];
|
|
||||||
}
|
|
||||||
temp = 0;
|
|
||||||
j = 0;
|
|
||||||
for (k=0;k<numlev;k++) {
|
|
||||||
for (i=0;i<imx[k];i++) {
|
|
||||||
j++;
|
|
||||||
temp += jmx[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
d_size = nprocs*sizeof(double ***);
|
|
||||||
psi = (double ****) G_MALLOC(d_size);
|
|
||||||
psim = (double ****) G_MALLOC(d_size);
|
|
||||||
work1 = (double ****) G_MALLOC(d_size);
|
|
||||||
work4 = (double ****) G_MALLOC(d_size);
|
|
||||||
work5 = (double ****) G_MALLOC(d_size);
|
|
||||||
work7 = (double ****) G_MALLOC(d_size);
|
|
||||||
temparray = (double ****) G_MALLOC(d_size);
|
|
||||||
|
|
||||||
d_size = 2*sizeof(double **);
|
|
||||||
for (i=0;i<nprocs;i++) {
|
|
||||||
psi[i] = (double ***) G_MALLOC(d_size);
|
|
||||||
psim[i] = (double ***) G_MALLOC(d_size);
|
|
||||||
work1[i] = (double ***) G_MALLOC(d_size);
|
|
||||||
work4[i] = (double ***) G_MALLOC(d_size);
|
|
||||||
work5[i] = (double ***) G_MALLOC(d_size);
|
|
||||||
work7[i] = (double ***) G_MALLOC(d_size);
|
|
||||||
temparray[i] = (double ***) G_MALLOC(d_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
d_size = nprocs*sizeof(double **);
|
|
||||||
psium = (double ***) G_MALLOC(d_size);
|
|
||||||
psilm = (double ***) G_MALLOC(d_size);
|
|
||||||
psib = (double ***) G_MALLOC(d_size);
|
|
||||||
ga = (double ***) G_MALLOC(d_size);
|
|
||||||
gb = (double ***) G_MALLOC(d_size);
|
|
||||||
work2 = (double ***) G_MALLOC(d_size);
|
|
||||||
work3 = (double ***) G_MALLOC(d_size);
|
|
||||||
work6 = (double ***) G_MALLOC(d_size);
|
|
||||||
tauz = (double ***) G_MALLOC(d_size);
|
|
||||||
oldga = (double ***) G_MALLOC(d_size);
|
|
||||||
oldgb = (double ***) G_MALLOC(d_size);
|
|
||||||
|
|
||||||
gp = (struct Global_Private *) G_MALLOC((nprocs+1)*sizeof(struct Global_Private));
|
|
||||||
for (i=0;i<nprocs;i++) {
|
|
||||||
gp[i].rel_num_x = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].rel_num_y = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].eist = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].ejst = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].oist = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].ojst = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].rlist = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].rljst = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].rlien = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].rljen = (long *) G_MALLOC(numlev*sizeof(long));
|
|
||||||
gp[i].multi_time = 0;
|
|
||||||
gp[i].total_time = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
subblock();
|
|
||||||
|
|
||||||
x_part = (jm - 2)/xprocs + 2;
|
|
||||||
y_part = (im - 2)/yprocs + 2;
|
|
||||||
|
|
||||||
d_size = x_part*y_part*sizeof(double) + y_part*sizeof(double *);
|
|
||||||
|
|
||||||
global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct));
|
|
||||||
for (i=0;i<nprocs;i++) {
|
|
||||||
psi[i][0] = (double **) G_MALLOC(d_size);
|
|
||||||
psi[i][1] = (double **) G_MALLOC(d_size);
|
|
||||||
psim[i][0] = (double **) G_MALLOC(d_size);
|
|
||||||
psim[i][1] = (double **) G_MALLOC(d_size);
|
|
||||||
psium[i] = (double **) G_MALLOC(d_size);
|
|
||||||
psilm[i] = (double **) G_MALLOC(d_size);
|
|
||||||
psib[i] = (double **) G_MALLOC(d_size);
|
|
||||||
ga[i] = (double **) G_MALLOC(d_size);
|
|
||||||
gb[i] = (double **) G_MALLOC(d_size);
|
|
||||||
work1[i][0] = (double **) G_MALLOC(d_size);
|
|
||||||
work1[i][1] = (double **) G_MALLOC(d_size);
|
|
||||||
work2[i] = (double **) G_MALLOC(d_size);
|
|
||||||
work3[i] = (double **) G_MALLOC(d_size);
|
|
||||||
work4[i][0] = (double **) G_MALLOC(d_size);
|
|
||||||
work4[i][1] = (double **) G_MALLOC(d_size);
|
|
||||||
work5[i][0] = (double **) G_MALLOC(d_size);
|
|
||||||
work5[i][1] = (double **) G_MALLOC(d_size);
|
|
||||||
work6[i] = (double **) G_MALLOC(d_size);
|
|
||||||
work7[i][0] = (double **) G_MALLOC(d_size);
|
|
||||||
work7[i][1] = (double **) G_MALLOC(d_size);
|
|
||||||
temparray[i][0] = (double **) G_MALLOC(d_size);
|
|
||||||
temparray[i][1] = (double **) G_MALLOC(d_size);
|
|
||||||
tauz[i] = (double **) G_MALLOC(d_size);
|
|
||||||
oldga[i] = (double **) G_MALLOC(d_size);
|
|
||||||
oldgb[i] = (double **) G_MALLOC(d_size);
|
|
||||||
}
|
|
||||||
f = (double *) G_MALLOC(im*sizeof(double));
|
|
||||||
|
|
||||||
multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct));
|
|
||||||
|
|
||||||
d_size = numlev*sizeof(double **);
|
|
||||||
if (numlev%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
d_size += sizeof(double **);
|
|
||||||
}
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
d_size += ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
|
||||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
|
||||||
}
|
|
||||||
|
|
||||||
d_size *= nprocs;
|
|
||||||
|
|
||||||
if (nprocs%2 == 1) { /* To make sure that the actual data
|
|
||||||
starts double word aligned, add an extra
|
|
||||||
pointer */
|
|
||||||
d_size += sizeof(double ***);
|
|
||||||
}
|
|
||||||
|
|
||||||
d_size += nprocs*sizeof(double ***);
|
|
||||||
q_multi = (double ****) G_MALLOC(d_size);
|
|
||||||
rhs_multi = (double ****) G_MALLOC(d_size);
|
|
||||||
|
|
||||||
locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct));
|
|
||||||
bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct));
|
|
||||||
|
|
||||||
LOCKINIT(locks->idlock)
|
|
||||||
LOCKINIT(locks->psiailock)
|
|
||||||
LOCKINIT(locks->psibilock)
|
|
||||||
LOCKINIT(locks->donelock)
|
|
||||||
LOCKINIT(locks->error_lock)
|
|
||||||
LOCKINIT(locks->bar_lock)
|
|
||||||
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARINIT(bars->iteration, nprocs)
|
|
||||||
BARINIT(bars->gsudn, nprocs)
|
|
||||||
BARINIT(bars->p_setup, nprocs)
|
|
||||||
BARINIT(bars->p_redph, nprocs)
|
|
||||||
BARINIT(bars->p_soln, nprocs)
|
|
||||||
BARINIT(bars->p_subph, nprocs)
|
|
||||||
BARINIT(bars->sl_prini, nprocs)
|
|
||||||
BARINIT(bars->sl_psini, nprocs)
|
|
||||||
BARINIT(bars->sl_onetime, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_1, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_2, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_3, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_4, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_5, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_6, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_7, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_8, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_9, nprocs)
|
|
||||||
BARINIT(bars->sl_phase_10, nprocs)
|
|
||||||
BARINIT(bars->error_barrier, nprocs)
|
|
||||||
#else
|
|
||||||
BARINIT(bars->barrier, nprocs)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
link_all();
|
|
||||||
|
|
||||||
multi->err_multi = 0.0;
|
|
||||||
i_int_coeff[0] = 0.0;
|
|
||||||
j_int_coeff[0] = 0.0;
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
i_int_coeff[i] = 1.0/(imx[i]-1);
|
|
||||||
j_int_coeff[i] = 1.0/(jmx[i]-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialize constants and variables
|
|
||||||
|
|
||||||
id is a global shared variable that has fetch-and-add operations
|
|
||||||
performed on it by processes to obtain their pids. */
|
|
||||||
|
|
||||||
global->id = 0;
|
|
||||||
global->psibi = 0.0;
|
|
||||||
pi = atan(1.0);
|
|
||||||
pi = 4.*pi;
|
|
||||||
|
|
||||||
factjacob = -1./(12.*res*res);
|
|
||||||
factlap = 1./(res*res);
|
|
||||||
eig2 = -h*f0*f0/(h1*h3*gpr);
|
|
||||||
|
|
||||||
jmm1 = jm-1 ;
|
|
||||||
ysca = ((double) jmm1)*res ;
|
|
||||||
|
|
||||||
im = (imx[numlev-1]-2)/yprocs + 2;
|
|
||||||
jm = (jmx[numlev-1]-2)/xprocs + 2;
|
|
||||||
|
|
||||||
if (do_output) {
|
|
||||||
printf(" MULTIGRID OUTPUTS\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
CREATE(slave, nprocs);
|
|
||||||
WAIT_FOR_END(nprocs);
|
|
||||||
CLOCK(computeend)
|
|
||||||
|
|
||||||
printf("\n");
|
|
||||||
printf(" PROCESS STATISTICS\n");
|
|
||||||
printf(" Total Multigrid Multigrid\n");
|
|
||||||
printf(" Proc Time Time Fraction\n");
|
|
||||||
printf(" 0 %15.0f %15.0f %10.3f\n", gp[0].total_time,gp[0].multi_time, gp[0].multi_time/gp[0].total_time);
|
|
||||||
|
|
||||||
if (do_stats) {
|
|
||||||
min_total = max_total = avg_total = gp[0].total_time;
|
|
||||||
min_multi = max_multi = avg_multi = gp[0].multi_time;
|
|
||||||
min_frac = max_frac = avg_frac = gp[0].multi_time/gp[0].total_time;
|
|
||||||
for (i=1;i<nprocs;i++) {
|
|
||||||
if (gp[i].total_time > max_total) {
|
|
||||||
max_total = gp[i].total_time;
|
|
||||||
}
|
|
||||||
if (gp[i].total_time < min_total) {
|
|
||||||
min_total = gp[i].total_time;
|
|
||||||
}
|
|
||||||
if (gp[i].multi_time > max_multi) {
|
|
||||||
max_multi = gp[i].multi_time;
|
|
||||||
}
|
|
||||||
if (gp[i].multi_time < min_multi) {
|
|
||||||
min_multi = gp[i].multi_time;
|
|
||||||
}
|
|
||||||
if (gp[i].multi_time/gp[i].total_time > max_frac) {
|
|
||||||
max_frac = gp[i].multi_time/gp[i].total_time;
|
|
||||||
}
|
|
||||||
if (gp[i].multi_time/gp[i].total_time < min_frac) {
|
|
||||||
min_frac = gp[i].multi_time/gp[i].total_time;
|
|
||||||
}
|
|
||||||
avg_total += gp[i].total_time;
|
|
||||||
avg_multi += gp[i].multi_time;
|
|
||||||
avg_frac += gp[i].multi_time/gp[i].total_time;
|
|
||||||
}
|
|
||||||
avg_total = avg_total / nprocs;
|
|
||||||
avg_multi = avg_multi / nprocs;
|
|
||||||
avg_frac = avg_frac / nprocs;
|
|
||||||
for (i=1;i<nprocs;i++) {
|
|
||||||
printf(" %3ld %15.0f %15.0f %10.3f\n", i,gp[i].total_time,gp[i].multi_time, gp[i].multi_time/gp[i].total_time);
|
|
||||||
}
|
|
||||||
printf(" Avg %15.0f %15.0f %10.3f\n", avg_total,avg_multi,avg_frac);
|
|
||||||
printf(" Min %15.0f %15.0f %10.3f\n", min_total,min_multi,min_frac);
|
|
||||||
printf(" Max %15.0f %15.0f %10.3f\n", max_total,max_multi,max_frac);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
global->starttime = start;
|
|
||||||
printf(" TIMING INFORMATION\n");
|
|
||||||
printf("Start time : %16lu\n", global->starttime);
|
|
||||||
printf("Initialization finish time : %16lu\n", global->trackstart);
|
|
||||||
printf("Overall finish time : %16lu\n", computeend);
|
|
||||||
printf("Total time with initialization : %16lu\n", computeend-global->starttime);
|
|
||||||
printf("Total time without initialization : %16lu\n", computeend-global->trackstart);
|
|
||||||
printf(" (excludes first timestep)\n");
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
MAIN_END
|
|
||||||
}
|
|
||||||
|
|
||||||
long log_2(long number)
|
|
||||||
{
|
|
||||||
long cumulative = 1;
|
|
||||||
long out = 0;
|
|
||||||
long done = 0;
|
|
||||||
|
|
||||||
while ((cumulative < number) && (!done) && (out < 50)) {
|
|
||||||
if (cumulative == number) {
|
|
||||||
done = 1;
|
|
||||||
} else {
|
|
||||||
cumulative = cumulative * 2;
|
|
||||||
out ++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cumulative == number) {
|
|
||||||
return(out);
|
|
||||||
} else {
|
|
||||||
return(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void printerr(char *s)
|
|
||||||
{
|
|
||||||
fprintf(stderr,"ERROR: %s\n",s);
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,816 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/* Shared memory implementation of the multigrid method
|
|
||||||
Implementation uses red-black gauss-seidel relaxation
|
|
||||||
iterations, w cycles, and the method of half-injection for
|
|
||||||
residual computation. */
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
/* perform multigrid (w cycles) */
|
|
||||||
void multig(long my_id)
|
|
||||||
{
|
|
||||||
long iter;
|
|
||||||
double wu;
|
|
||||||
double errp;
|
|
||||||
long m;
|
|
||||||
long flag1;
|
|
||||||
long flag2;
|
|
||||||
long k;
|
|
||||||
long my_num;
|
|
||||||
double wmax;
|
|
||||||
double local_err;
|
|
||||||
double red_local_err;
|
|
||||||
double black_local_err;
|
|
||||||
double g_error;
|
|
||||||
|
|
||||||
flag1 = 0;
|
|
||||||
flag2 = 0;
|
|
||||||
iter = 0;
|
|
||||||
m = numlev-1;
|
|
||||||
wmax = maxwork;
|
|
||||||
my_num = my_id;
|
|
||||||
wu = 0.0;
|
|
||||||
|
|
||||||
k = m;
|
|
||||||
g_error = 1.0e30;
|
|
||||||
while ((!flag1) && (!flag2)) {
|
|
||||||
errp = g_error;
|
|
||||||
iter++;
|
|
||||||
if (my_num == MASTER) {
|
|
||||||
multi->err_multi = 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* barrier to make sure all procs have finished intadd or rescal */
|
|
||||||
/* before proceeding with relaxation */
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->error_barrier,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
copy_black(k,my_num);
|
|
||||||
|
|
||||||
relax(k,&red_local_err,RED_ITER,my_num);
|
|
||||||
|
|
||||||
/* barrier to make sure all red computations have been performed */
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->error_barrier,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
copy_red(k,my_num);
|
|
||||||
|
|
||||||
relax(k,&black_local_err,BLACK_ITER,my_num);
|
|
||||||
|
|
||||||
/* compute max local error from red_local_err and black_local_err */
|
|
||||||
|
|
||||||
if (red_local_err > black_local_err) {
|
|
||||||
local_err = red_local_err;
|
|
||||||
} else {
|
|
||||||
local_err = black_local_err;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update the global error if necessary */
|
|
||||||
|
|
||||||
LOCK(locks->error_lock)
|
|
||||||
if (local_err > multi->err_multi) {
|
|
||||||
multi->err_multi = local_err;
|
|
||||||
}
|
|
||||||
UNLOCK(locks->error_lock)
|
|
||||||
|
|
||||||
/* a single relaxation sweep at the finest level is one unit of */
|
|
||||||
/* work */
|
|
||||||
|
|
||||||
wu+=pow((double)4.0,(double)k-m);
|
|
||||||
|
|
||||||
/* barrier to make sure all processors have checked local error */
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->error_barrier,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
g_error = multi->err_multi;
|
|
||||||
|
|
||||||
/* barrier to make sure master does not cycle back to top of loop */
|
|
||||||
/* and reset global->err before we read it and decide what to do */
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->error_barrier,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (g_error >= lev_tol[k]) {
|
|
||||||
if (wu > wmax) {
|
|
||||||
/* max work exceeded */
|
|
||||||
flag1 = 1;
|
|
||||||
fprintf(stderr,"ERROR: Maximum work limit %0.5f exceeded\n",wmax);
|
|
||||||
exit(-1);
|
|
||||||
} else {
|
|
||||||
/* if we have not converged */
|
|
||||||
if ((k != 0) && (g_error/errp >= 0.6) &&
|
|
||||||
(k > minlevel)) {
|
|
||||||
/* if need to go to coarser grid */
|
|
||||||
|
|
||||||
copy_borders(k,my_num);
|
|
||||||
copy_rhs_borders(k,my_num);
|
|
||||||
|
|
||||||
/* This bar is needed because the routine rescal uses the neighbor's
|
|
||||||
border points to compute s4. We must ensure that the neighbor's
|
|
||||||
border points have been written before we try computing the new
|
|
||||||
rescal values */
|
|
||||||
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->error_barrier,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
rescal(k,my_num);
|
|
||||||
|
|
||||||
/* transfer residual to rhs of coarser grid */
|
|
||||||
lev_tol[k-1] = 0.3 * g_error;
|
|
||||||
k = k-1;
|
|
||||||
putz(k,my_num);
|
|
||||||
/* make initial guess on coarser grid zero */
|
|
||||||
g_error = 1.0e30;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* if we have converged at this level */
|
|
||||||
if (k == m) {
|
|
||||||
/* if finest grid, we are done */
|
|
||||||
flag2 = 1;
|
|
||||||
} else {
|
|
||||||
/* else go to next finest grid */
|
|
||||||
|
|
||||||
copy_borders(k,my_num);
|
|
||||||
|
|
||||||
intadd(k,my_num);
|
|
||||||
/* changes the grid values at the finer level. rhs at finer level */
|
|
||||||
/* remains what it already is */
|
|
||||||
k++;
|
|
||||||
g_error = 1.0e30;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (do_output) {
|
|
||||||
if (my_num == MASTER) {
|
|
||||||
printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter,k,multi->err_multi,wu);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* perform red or black iteration (not both) */
|
|
||||||
void relax(long k, double *err, long color, long my_num)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long iend;
|
|
||||||
long jend;
|
|
||||||
long oddistart;
|
|
||||||
long oddjstart;
|
|
||||||
long evenistart;
|
|
||||||
long evenjstart;
|
|
||||||
double a;
|
|
||||||
double h;
|
|
||||||
double factor;
|
|
||||||
double maxerr;
|
|
||||||
double newerr;
|
|
||||||
double oldval;
|
|
||||||
double newval;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
double *t1c;
|
|
||||||
double *t1d;
|
|
||||||
|
|
||||||
i = 0;
|
|
||||||
j = 0;
|
|
||||||
|
|
||||||
*err = 0.0;
|
|
||||||
h = lev_res[k];
|
|
||||||
|
|
||||||
/* points whose sum of row and col index is even do a red iteration, */
|
|
||||||
/* others do a black */
|
|
||||||
|
|
||||||
evenistart = gp[my_num].eist[k];
|
|
||||||
evenjstart = gp[my_num].ejst[k];
|
|
||||||
oddistart = gp[my_num].oist[k];
|
|
||||||
oddjstart = gp[my_num].ojst[k];
|
|
||||||
|
|
||||||
iend = gp[my_num].rlien[k];
|
|
||||||
jend = gp[my_num].rljen[k];
|
|
||||||
|
|
||||||
factor = 4.0 - eig2 * h * h ;
|
|
||||||
maxerr = 0.0;
|
|
||||||
t2a = (double **) q_multi[my_num][k];
|
|
||||||
t2b = (double **) rhs_multi[my_num][k];
|
|
||||||
if (color == RED_ITER) {
|
|
||||||
for (i=evenistart;i<iend;i+=2) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
t1c = (double *) t2a[i-1];
|
|
||||||
t1d = (double *) t2a[i+1];
|
|
||||||
for (j=evenjstart;j<jend;j+=2) {
|
|
||||||
a = t1a[j+1] + t1a[j-1] +
|
|
||||||
t1c[j] + t1d[j] -
|
|
||||||
t1b[j] ;
|
|
||||||
oldval = t1a[j];
|
|
||||||
newval = a / factor;
|
|
||||||
newerr = oldval - newval;
|
|
||||||
t1a[j] = newval;
|
|
||||||
if (fabs(newerr) > maxerr) {
|
|
||||||
maxerr = fabs(newerr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i=oddistart;i<iend;i+=2) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
t1c = (double *) t2a[i-1];
|
|
||||||
t1d = (double *) t2a[i+1];
|
|
||||||
for (j=oddjstart;j<jend;j+=2) {
|
|
||||||
a = t1a[j+1] + t1a[j-1] +
|
|
||||||
t1c[j] + t1d[j] -
|
|
||||||
t1b[j] ;
|
|
||||||
oldval = t1a[j];
|
|
||||||
newval = a / factor;
|
|
||||||
newerr = oldval - newval;
|
|
||||||
t1a[j] = newval;
|
|
||||||
if (fabs(newerr) > maxerr) {
|
|
||||||
maxerr = fabs(newerr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (color == BLACK_ITER) {
|
|
||||||
for (i=evenistart;i<iend;i+=2) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
t1c = (double *) t2a[i-1];
|
|
||||||
t1d = (double *) t2a[i+1];
|
|
||||||
for (j=oddjstart;j<jend;j+=2) {
|
|
||||||
a = t1a[j+1] + t1a[j-1] +
|
|
||||||
t1c[j] + t1d[j] -
|
|
||||||
t1b[j] ;
|
|
||||||
oldval = t1a[j];
|
|
||||||
newval = a / factor;
|
|
||||||
newerr = oldval - newval;
|
|
||||||
t1a[j] = newval;
|
|
||||||
if (fabs(newerr) > maxerr) {
|
|
||||||
maxerr = fabs(newerr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i=oddistart;i<iend;i+=2) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
t1c = (double *) t2a[i-1];
|
|
||||||
t1d = (double *) t2a[i+1];
|
|
||||||
for (j=evenjstart;j<jend;j+=2) {
|
|
||||||
a = t1a[j+1] + t1a[j-1] +
|
|
||||||
t1c[j] + t1d[j] -
|
|
||||||
t1b[j] ;
|
|
||||||
oldval = t1a[j];
|
|
||||||
newval = a / factor;
|
|
||||||
newerr = oldval - newval;
|
|
||||||
t1a[j] = newval;
|
|
||||||
if (fabs(newerr) > maxerr) {
|
|
||||||
maxerr = fabs(newerr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*err = maxerr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* perform half-injection to next coarsest level */
|
|
||||||
void rescal(long kf, long my_num)
|
|
||||||
{
|
|
||||||
long ic;
|
|
||||||
long if17;
|
|
||||||
long jf;
|
|
||||||
long jc;
|
|
||||||
long krc;
|
|
||||||
long istart;
|
|
||||||
long iend;
|
|
||||||
long jstart;
|
|
||||||
long jend;
|
|
||||||
double hf;
|
|
||||||
double hc;
|
|
||||||
double s;
|
|
||||||
double s1;
|
|
||||||
double s2;
|
|
||||||
double s3;
|
|
||||||
double s4;
|
|
||||||
double factor;
|
|
||||||
double int1;
|
|
||||||
double int2;
|
|
||||||
double i_int_factor;
|
|
||||||
double j_int_factor;
|
|
||||||
double int_val;
|
|
||||||
long i_off;
|
|
||||||
long j_off;
|
|
||||||
long up_proc;
|
|
||||||
long left_proc;
|
|
||||||
long im;
|
|
||||||
long jm;
|
|
||||||
double temp;
|
|
||||||
double temp2;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double **t2c;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
double *t1c;
|
|
||||||
double *t1d;
|
|
||||||
double *t1e;
|
|
||||||
double *t1f;
|
|
||||||
double *t1g;
|
|
||||||
double *t1h;
|
|
||||||
|
|
||||||
krc = kf - 1;
|
|
||||||
hc = lev_res[krc];
|
|
||||||
hf = lev_res[kf];
|
|
||||||
i_off = gp[my_num].rownum*ypts_per_proc[krc];
|
|
||||||
j_off = gp[my_num].colnum*xpts_per_proc[krc];
|
|
||||||
up_proc = gp[my_num].neighbors[UP];
|
|
||||||
left_proc = gp[my_num].neighbors[LEFT];
|
|
||||||
im = (imx[kf]-2)/yprocs;
|
|
||||||
jm = (jmx[kf]-2)/xprocs;
|
|
||||||
|
|
||||||
istart = gp[my_num].rlist[krc];
|
|
||||||
jstart = gp[my_num].rljst[krc];
|
|
||||||
iend = gp[my_num].rlien[krc] - 1;
|
|
||||||
jend = gp[my_num].rljen[krc] - 1;
|
|
||||||
|
|
||||||
factor = 4.0 - eig2 * hf * hf;
|
|
||||||
|
|
||||||
t2a = (double **) q_multi[my_num][kf];
|
|
||||||
t2b = (double **) rhs_multi[my_num][kf];
|
|
||||||
t2c = (double **) rhs_multi[my_num][krc];
|
|
||||||
if17=2*(istart-1);
|
|
||||||
for(ic=istart;ic<=iend;ic++) {
|
|
||||||
if17+=2;
|
|
||||||
i_int_factor = (ic+i_off) * i_int_coeff[krc] * 0.5;
|
|
||||||
jf = 2 * (jstart - 1);
|
|
||||||
t1a = (double *) t2a[if17];
|
|
||||||
t1b = (double *) t2b[if17];
|
|
||||||
t1c = (double *) t2c[ic];
|
|
||||||
t1d = (double *) t2a[if17-1];
|
|
||||||
t1e = (double *) t2a[if17+1];
|
|
||||||
t1f = (double *) t2a[if17-2];
|
|
||||||
t1g = (double *) t2a[if17-3];
|
|
||||||
t1h = (double *) t2b[if17-2];
|
|
||||||
for(jc=jstart;jc<=jend;jc++) {
|
|
||||||
jf+=2;
|
|
||||||
j_int_factor = (jc+j_off)*j_int_coeff[krc] * 0.5;
|
|
||||||
|
|
||||||
/* method of half-injection uses 2.0 instead of 4.0 */
|
|
||||||
|
|
||||||
/* do bilinear interpolation */
|
|
||||||
s = t1a[jf+1] + t1a[jf-1] + t1d[jf] + t1e[jf];
|
|
||||||
s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]);
|
|
||||||
if (((if17 == 2) && (gp[my_num].neighbors[UP] == -1)) ||
|
|
||||||
((jf == 2) && (gp[my_num].neighbors[LEFT] == -1))) {
|
|
||||||
s2 = 0;
|
|
||||||
s3 = 0;
|
|
||||||
s4 = 0;
|
|
||||||
} else if ((if17 == 2) || (jf == 2)) {
|
|
||||||
if (jf == 2) {
|
|
||||||
temp = q_multi[left_proc][kf][if17][jm-1];
|
|
||||||
} else {
|
|
||||||
temp = t1a[jf-3];
|
|
||||||
}
|
|
||||||
s = t1a[jf-1] + temp + t1d[jf-2] + t1e[jf-2];
|
|
||||||
s2 = 2.0 * (t1b[jf-2] - s + factor * t1a[jf-2]);
|
|
||||||
if (if17 == 2) {
|
|
||||||
temp = q_multi[up_proc][kf][im-1][jf];
|
|
||||||
} else {
|
|
||||||
temp = t1g[jf];
|
|
||||||
}
|
|
||||||
s = t1f[jf+1]+ t1f[jf-1]+ temp + t1d[jf];
|
|
||||||
s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);
|
|
||||||
if (jf == 2) {
|
|
||||||
temp = q_multi[left_proc][kf][if17-2][jm-1];
|
|
||||||
} else {
|
|
||||||
temp = t1f[jf-3];
|
|
||||||
}
|
|
||||||
if (if17 == 2) {
|
|
||||||
temp2 = q_multi[up_proc][kf][im-1][jf-2];
|
|
||||||
} else {
|
|
||||||
temp2 = t1g[jf-2];
|
|
||||||
}
|
|
||||||
s = t1f[jf-1]+ temp + temp2 + t1d[jf-2];
|
|
||||||
s4 = 2.0 * (t1h[jf-2] - s + factor * t1f[jf-2]);
|
|
||||||
} else {
|
|
||||||
s = t1a[jf-1] + t1a[jf-3] + t1d[jf-2] + t1e[jf-2];
|
|
||||||
s2 = 2.0 * (t1b[jf-2] - s + factor * t1a[jf-2]);
|
|
||||||
s = t1f[jf+1]+ t1f[jf-1]+ t1g[jf] + t1d[jf];
|
|
||||||
s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);
|
|
||||||
s = t1f[jf-1]+ t1f[jf-3]+ t1g[jf-2]+ t1d[jf-2];
|
|
||||||
s4 = 2.0 * (t1h[jf-2] - s + factor * t1f[jf-2]);
|
|
||||||
}
|
|
||||||
int1 = j_int_factor*s4 + (1.0-j_int_factor)*s3;
|
|
||||||
int2 = j_int_factor*s2 + (1.0-j_int_factor)*s1;
|
|
||||||
int_val = i_int_factor*int1+(1.0-i_int_factor)*int2;
|
|
||||||
t1c[jc] = i_int_factor*int1+(1.0-i_int_factor)*int2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* perform interpolation and addition to next finest grid */
|
|
||||||
void intadd(long kc, long my_num)
|
|
||||||
{
|
|
||||||
long ic;
|
|
||||||
long if17;
|
|
||||||
long jf;
|
|
||||||
long jc;
|
|
||||||
long kf;
|
|
||||||
long istart;
|
|
||||||
long jstart;
|
|
||||||
long iend;
|
|
||||||
long jend;
|
|
||||||
double hc;
|
|
||||||
double hf;
|
|
||||||
double int1;
|
|
||||||
double int2;
|
|
||||||
double i_int_factor1;
|
|
||||||
double j_int_factor1;
|
|
||||||
double i_int_factor2;
|
|
||||||
double j_int_factor2;
|
|
||||||
long i_off;
|
|
||||||
long j_off;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
double *t1c;
|
|
||||||
double *t1d;
|
|
||||||
double *t1e;
|
|
||||||
|
|
||||||
kf = kc + 1;
|
|
||||||
hc = lev_res[kc];
|
|
||||||
hf = lev_res[kf];
|
|
||||||
|
|
||||||
istart = gp[my_num].rlist[kc];
|
|
||||||
jstart = gp[my_num].rljst[kc];
|
|
||||||
iend = gp[my_num].rlien[kc] - 1;
|
|
||||||
jend = gp[my_num].rljen[kc] - 1;
|
|
||||||
i_off = gp[my_num].rownum*ypts_per_proc[kc];
|
|
||||||
j_off = gp[my_num].colnum*xpts_per_proc[kc];
|
|
||||||
|
|
||||||
t2a = (double **) q_multi[my_num][kc];
|
|
||||||
t2b = (double **) q_multi[my_num][kf];
|
|
||||||
if17 = 2*(istart-1);
|
|
||||||
for(ic=istart;ic<=iend;ic++) {
|
|
||||||
if17+=2;
|
|
||||||
i_int_factor1= ((imx[kc]-2)-(ic+i_off-1)) * (i_int_coeff[kf]);
|
|
||||||
i_int_factor2= (ic+i_off) * i_int_coeff[kf];
|
|
||||||
jf = 2*(jstart-1);
|
|
||||||
|
|
||||||
t1a = (double *) t2a[ic];
|
|
||||||
t1b = (double *) t2a[ic-1];
|
|
||||||
t1c = (double *) t2a[ic+1];
|
|
||||||
t1d = (double *) t2b[if17];
|
|
||||||
t1e = (double *) t2b[if17-1];
|
|
||||||
for(jc=jstart;jc<=jend;jc++) {
|
|
||||||
jf+=2;
|
|
||||||
j_int_factor1= ((jmx[kc]-2)-(jc+j_off-1)) * (j_int_coeff[kf]);
|
|
||||||
j_int_factor2= (jc+j_off) * j_int_coeff[kf];
|
|
||||||
|
|
||||||
int1 = j_int_factor1*t1a[jc-1] + (1.0-j_int_factor1)*t1a[jc];
|
|
||||||
int2 = j_int_factor1*t1b[jc-1] + (1.0-j_int_factor1)*t1b[jc];
|
|
||||||
t1e[jf-1] += i_int_factor1*int2 + (1.0-i_int_factor1)*int1;
|
|
||||||
int2 = j_int_factor1*t1c[jc-1] + (1.0-j_int_factor1)*t1c[jc];
|
|
||||||
t1d[jf-1] += i_int_factor2*int2 + (1.0-i_int_factor2)*int1;
|
|
||||||
int1 = j_int_factor2*t1a[jc+1] + (1.0-j_int_factor2)*t1a[jc];
|
|
||||||
int2 = j_int_factor2*t1b[jc+1] + (1.0-j_int_factor2)*t1b[jc];
|
|
||||||
t1e[jf] += i_int_factor1*int2 + (1.0-i_int_factor1)*int1;
|
|
||||||
int2 = j_int_factor2*t1c[jc+1] + (1.0-j_int_factor2)*t1c[jc];
|
|
||||||
t1d[jf] += i_int_factor2*int2 + (1.0-i_int_factor2)*int1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialize a grid to zero in parallel */
|
|
||||||
void putz(long k, long my_num)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long istart;
|
|
||||||
long jstart;
|
|
||||||
long iend;
|
|
||||||
long jend;
|
|
||||||
double **t2a;
|
|
||||||
double *t1a;
|
|
||||||
|
|
||||||
istart = gp[my_num].rlist[k];
|
|
||||||
jstart = gp[my_num].rljst[k];
|
|
||||||
iend = gp[my_num].rlien[k];
|
|
||||||
jend = gp[my_num].rljen[k];
|
|
||||||
|
|
||||||
t2a = (double **) q_multi[my_num][k];
|
|
||||||
for (i=istart;i<=iend;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
for (j=jstart;j<=jend;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void copy_borders(long k, long pid)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long jj;
|
|
||||||
long im;
|
|
||||||
long jm;
|
|
||||||
long lastrow;
|
|
||||||
long lastcol;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
|
|
||||||
im = (imx[k]-2)/yprocs + 2;
|
|
||||||
jm = (jmx[k]-2)/xprocs + 2;
|
|
||||||
lastrow = (imx[k]-2)/yprocs;
|
|
||||||
lastcol = (jmx[k]-2)/xprocs;
|
|
||||||
|
|
||||||
t2a = (double **) q_multi[pid][k];
|
|
||||||
jj = gp[pid].neighbors[UPLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0]=q_multi[jj][k][im-2][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[UPRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1]=q_multi[jj][k][im-2][1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNLEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0]=q_multi[jj][k][1][jm-2];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWNRIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1]=q_multi[jj][k][1][1];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gp[pid].neighbors[UP] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = q_multi[jj][k][0][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = q_multi[jj][k][1][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = q_multi[jj][k][0][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = q_multi[jj][k][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[DOWN] == -1) {
|
|
||||||
jj = gp[pid].neighbors[LEFT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = q_multi[jj][k][im-1][jm-2];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = q_multi[jj][k][im-2][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[RIGHT];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = q_multi[jj][k][im-1][1];
|
|
||||||
} else {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = q_multi[jj][k][im-2][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[LEFT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][0] = q_multi[jj][k][im-2][0];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][0] = q_multi[jj][k][1][0];
|
|
||||||
}
|
|
||||||
} else if (gp[pid].neighbors[RIGHT] == -1) {
|
|
||||||
jj = gp[pid].neighbors[UP];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[0][jm-1] = q_multi[jj][k][im-2][jm-1];
|
|
||||||
}
|
|
||||||
jj = gp[pid].neighbors[DOWN];
|
|
||||||
if (jj != -1) {
|
|
||||||
t2a[im-1][jm-1] = q_multi[jj][k][1][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
j = gp[pid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) q_multi[j][k][im-2];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) q_multi[j][k][1];
|
|
||||||
for (i=1;i<=lastcol;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) q_multi[j][k];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[pid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) q_multi[j][k];
|
|
||||||
for (i=1;i<=lastrow;i++) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void copy_rhs_borders(long k, long procid)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long im;
|
|
||||||
long jm;
|
|
||||||
long lastrow;
|
|
||||||
long lastcol;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
|
|
||||||
im = (imx[k]-2)/yprocs+2;
|
|
||||||
jm = (jmx[k]-2)/xprocs+2;
|
|
||||||
lastrow = (imx[k]-2)/yprocs;
|
|
||||||
lastcol = (jmx[k]-2)/xprocs;
|
|
||||||
|
|
||||||
t2a = (double **) rhs_multi[procid][k];
|
|
||||||
if (gp[procid].neighbors[UPLEFT] != -1) {
|
|
||||||
j = gp[procid].neighbors[UPLEFT];
|
|
||||||
t2a[0][0] = rhs_multi[j][k][im-2][jm-2];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gp[procid].neighbors[UP] != -1) {
|
|
||||||
j = gp[procid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) rhs_multi[j][k][im-2];
|
|
||||||
for (i=2;i<=lastcol;i+=2) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] != -1) {
|
|
||||||
j = gp[procid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) rhs_multi[j][k];
|
|
||||||
for (i=2;i<=lastrow;i+=2) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void copy_red(long k, long procid)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long im;
|
|
||||||
long jm;
|
|
||||||
long lastrow;
|
|
||||||
long lastcol;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
|
|
||||||
im = (imx[k]-2)/yprocs+2;
|
|
||||||
jm = (jmx[k]-2)/xprocs+2;
|
|
||||||
lastrow = (imx[k]-2)/yprocs;
|
|
||||||
lastcol = (jmx[k]-2)/xprocs;
|
|
||||||
|
|
||||||
t2a = (double **) q_multi[procid][k];
|
|
||||||
j = gp[procid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) q_multi[j][k][im-2];
|
|
||||||
for (i=2;i<=lastcol;i+=2) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) q_multi[j][k][1];
|
|
||||||
for (i=1;i<=lastcol;i+=2) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) q_multi[j][k];
|
|
||||||
for (i=2;i<=lastrow;i+=2) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) q_multi[j][k];
|
|
||||||
for (i=1;i<=lastrow;i+=2) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void copy_black(long k, long procid)
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long im;
|
|
||||||
long jm;
|
|
||||||
long lastrow;
|
|
||||||
long lastcol;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
|
|
||||||
im = (imx[k]-2)/yprocs+2;
|
|
||||||
jm = (jmx[k]-2)/xprocs+2;
|
|
||||||
lastrow = (imx[k]-2)/yprocs;
|
|
||||||
lastcol = (jmx[k]-2)/xprocs;
|
|
||||||
|
|
||||||
t2a = (double **) q_multi[procid][k];
|
|
||||||
j = gp[procid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) q_multi[j][k][im-2];
|
|
||||||
for (i=1;i<=lastcol;i+=2) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) q_multi[j][k][1];
|
|
||||||
for (i=2;i<=lastcol;i+=2) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) q_multi[j][k];
|
|
||||||
for (i=1;i<=lastrow;i+=2) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) q_multi[j][k];
|
|
||||||
for (i=2;i<=lastrow;i+=2) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,830 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/* ****************
|
|
||||||
subroutine slave
|
|
||||||
**************** */
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
void slave()
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long nstep;
|
|
||||||
long iindex;
|
|
||||||
long iday;
|
|
||||||
double ysca1;
|
|
||||||
double y;
|
|
||||||
double factor;
|
|
||||||
double sintemp;
|
|
||||||
double curlt;
|
|
||||||
double ressqr;
|
|
||||||
long istart;
|
|
||||||
long iend;
|
|
||||||
long jstart;
|
|
||||||
long jend;
|
|
||||||
long ist;
|
|
||||||
long ien;
|
|
||||||
long jst;
|
|
||||||
long jen;
|
|
||||||
double fac;
|
|
||||||
long dayflag=0;
|
|
||||||
long dhourflag=0;
|
|
||||||
long endflag=0;
|
|
||||||
long firstrow;
|
|
||||||
long lastrow;
|
|
||||||
long numrows;
|
|
||||||
long firstcol;
|
|
||||||
long lastcol;
|
|
||||||
long numcols;
|
|
||||||
long psiindex;
|
|
||||||
double psibipriv;
|
|
||||||
double ttime;
|
|
||||||
double dhour;
|
|
||||||
double day;
|
|
||||||
long procid;
|
|
||||||
long psinum;
|
|
||||||
long j_off = 0;
|
|
||||||
unsigned long t1;
|
|
||||||
double **t2a;
|
|
||||||
double **t2b;
|
|
||||||
double *t1a;
|
|
||||||
double *t1b;
|
|
||||||
double *t1c;
|
|
||||||
double *t1d;
|
|
||||||
|
|
||||||
ressqr = lev_res[numlev-1] * lev_res[numlev-1];
|
|
||||||
|
|
||||||
LOCK(locks->idlock)
|
|
||||||
procid = global->id;
|
|
||||||
global->id = global->id+1;
|
|
||||||
UNLOCK(locks->idlock)
|
|
||||||
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->sl_prini,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
|
|
||||||
processors to avoid migration. */
|
|
||||||
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might distribute
|
|
||||||
data structures across physically distributed memories as
|
|
||||||
desired.
|
|
||||||
|
|
||||||
One way to do this is as follows. The function allocate(START,SIZE,I)
|
|
||||||
is assumed to place all addresses x such that
|
|
||||||
(START <= x < START+SIZE) on node I.
|
|
||||||
|
|
||||||
long d_size;
|
|
||||||
unsigned long g_size;
|
|
||||||
unsigned long mg_size;
|
|
||||||
|
|
||||||
if (procid == MASTER) {
|
|
||||||
g_size = ((jmx[numlev-1]-2)/xprocs+2)*((imx[numlev-1]-2)/yprocs+2)*siz
|
|
||||||
eof(double) +
|
|
||||||
((imx[numlev-1]-2)/yprocs+2)*sizeof(double *);
|
|
||||||
|
|
||||||
mg_size = numlev*sizeof(double **);
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
mg_size+=((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+
|
|
||||||
((imx[i]-2)/yprocs+2)*sizeof(double *);
|
|
||||||
}
|
|
||||||
for (i= 0;i<nprocs;i++) {
|
|
||||||
d_size = 2*sizeof(double **);
|
|
||||||
allocate((unsigned long) psi[i],d_size,i);
|
|
||||||
allocate((unsigned long) psim[i],d_size,i);
|
|
||||||
allocate((unsigned long) work1[i],d_size,i);
|
|
||||||
allocate((unsigned long) work4[i],d_size,i);
|
|
||||||
allocate((unsigned long) work5[i],d_size,i);
|
|
||||||
allocate((unsigned long) work7[i],d_size,i);
|
|
||||||
allocate((unsigned long) temparray[i],d_size,i);
|
|
||||||
allocate((unsigned long) psi[i][0],g_size,i);
|
|
||||||
allocate((unsigned long) psi[i][1],g_size,i);
|
|
||||||
allocate((unsigned long) psim[i][0],g_size,i);
|
|
||||||
allocate((unsigned long) psim[i][1],g_size,i);
|
|
||||||
allocate((unsigned long) psium[i],g_size,i);
|
|
||||||
allocate((unsigned long) psilm[i],g_size,i);
|
|
||||||
allocate((unsigned long) psib[i],g_size,i);
|
|
||||||
allocate((unsigned long) ga[i],g_size,i);
|
|
||||||
allocate((unsigned long) gb[i],g_size,i);
|
|
||||||
allocate((unsigned long) work1[i][0],g_size,i);
|
|
||||||
allocate((unsigned long) work1[i][1],g_size,i);
|
|
||||||
allocate((unsigned long) work2[i],g_size,i);
|
|
||||||
allocate((unsigned long) work3[i],g_size,i);
|
|
||||||
allocate((unsigned long) work4[i][0],g_size,i);
|
|
||||||
allocate((unsigned long) work4[i][1],g_size,i);
|
|
||||||
allocate((unsigned long) work5[i][0],g_size,i);
|
|
||||||
allocate((unsigned long) work5[i][1],g_size,i);
|
|
||||||
allocate((unsigned long) work6[i],g_size,i);
|
|
||||||
allocate((unsigned long) work7[i][0],g_size,i);
|
|
||||||
allocate((unsigned long) work7[i][1],g_size,i);
|
|
||||||
allocate((unsigned long) temparray[i][0],g_size,i);
|
|
||||||
allocate((unsigned long) temparray[i][1],g_size,i);
|
|
||||||
allocate((unsigned long) tauz[i],g_size,i);
|
|
||||||
allocate((unsigned long) oldga[i],g_size,i);
|
|
||||||
allocate((unsigned long) oldgb[i],g_size,i);
|
|
||||||
d_size = numlev * sizeof(long);
|
|
||||||
allocate((unsigned long) gp[i].rel_num_x,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].rel_num_y,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].eist,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].ejst,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].oist,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].ojst,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].rlist,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].rljst,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].rlien,d_size,i);
|
|
||||||
allocate((unsigned long) gp[i].rljen,d_size,i);
|
|
||||||
|
|
||||||
allocate((unsigned long) q_multi[i],mg_size,i);
|
|
||||||
allocate((unsigned long) rhs_multi[i],mg_size,i);
|
|
||||||
allocate((unsigned long) &(gp[i]),sizeof(struct Global_Private),i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
t2a = (double **) oldga[procid];
|
|
||||||
t2b = (double **) oldgb[procid];
|
|
||||||
for (i=0;i<im;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
for (j=0;j<jm;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
t1b[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
firstcol = 1;
|
|
||||||
lastcol = firstcol + gp[procid].rel_num_x[numlev-1] - 1;
|
|
||||||
firstrow = 1;
|
|
||||||
lastrow = firstrow + gp[procid].rel_num_y[numlev-1] - 1;
|
|
||||||
numcols = gp[procid].rel_num_x[numlev-1];
|
|
||||||
numrows = gp[procid].rel_num_y[numlev-1];
|
|
||||||
j_off = gp[procid].colnum*numcols;
|
|
||||||
|
|
||||||
if (procid > nprocs/2) {
|
|
||||||
psinum = 2;
|
|
||||||
} else {
|
|
||||||
psinum = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* every process gets its own copy of the timing variables to avoid
|
|
||||||
contention at shared memory locations. here, these variables
|
|
||||||
are initialized. */
|
|
||||||
|
|
||||||
ttime = 0.0;
|
|
||||||
dhour = 0.0;
|
|
||||||
nstep = 0 ;
|
|
||||||
day = 0.0;
|
|
||||||
|
|
||||||
ysca1 = 0.5*ysca;
|
|
||||||
if (procid == MASTER) {
|
|
||||||
t1a = (double *) f;
|
|
||||||
for (iindex = 0;iindex<=jmx[numlev-1]-1;iindex++) {
|
|
||||||
y = ((double) iindex)*res;
|
|
||||||
t1a[iindex] = f0+beta*(y-ysca1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) psium[procid];
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
t1a[iindex] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t2a = (double **) psilm[procid];
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
t1a[iindex] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) psib[procid];
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0]=1.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1]=1.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0]=1.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1]=1.0;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][0] = 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][jm-1] = 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
t1a[iindex] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* wait until all processes have completed the above initialization */
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->sl_prini,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
/* compute psib array (one-time computation) and integrate into psibi */
|
|
||||||
|
|
||||||
istart = 1;
|
|
||||||
iend = istart + gp[procid].rel_num_y[numlev-1] - 1;
|
|
||||||
jstart = 1;
|
|
||||||
jend = jstart + gp[procid].rel_num_x[numlev-1] - 1;
|
|
||||||
ist = istart;
|
|
||||||
ien = iend;
|
|
||||||
jst = jstart;
|
|
||||||
jen = jend;
|
|
||||||
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
istart = 0;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
jstart = 0;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
iend = im-1;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
jend = jm-1;
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) rhs_multi[procid][numlev-1];
|
|
||||||
t2b = (double **) psib[procid];
|
|
||||||
for(i=istart;i<=iend;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
for(j=jstart;j<=jend;j++) {
|
|
||||||
t1a[j] = t1b[j] * ressqr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t2a = (double **) q_multi[procid][numlev-1];
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) t2b[0];
|
|
||||||
for(j=jstart;j<=jend;j++) {
|
|
||||||
t1a[j] = t1b[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) t2b[im-1];
|
|
||||||
for(j=jstart;j<=jend;j++) {
|
|
||||||
t1a[j] = t1b[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(i=istart;i<=iend;i++) {
|
|
||||||
t2a[i][0] = t2b[i][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(i=istart;i<=iend;i++) {
|
|
||||||
t2a[i][jm-1] = t2b[i][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->sl_psini,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
t2a = (double **) psib[procid];
|
|
||||||
j = gp[procid].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) psib[j][im-2];
|
|
||||||
for (i=1;i<jm-1;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) psib[j][1];
|
|
||||||
for (i=1;i<jm-1;i++) {
|
|
||||||
t1a[i] = t1b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[LEFT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) psib[j];
|
|
||||||
for (i=1;i<im-1;i++) {
|
|
||||||
t2a[i][0] = t2b[i][jm-2];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[procid].neighbors[RIGHT];
|
|
||||||
if (j != -1) {
|
|
||||||
t2b = (double **) psib[j];
|
|
||||||
for (i=1;i<im-1;i++) {
|
|
||||||
t2a[i][jm-1] = t2b[i][1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t2a = (double **) q_multi[procid][numlev-1];
|
|
||||||
t2b = (double **) psib[procid];
|
|
||||||
fac = 1.0 / (4.0 - ressqr*eig2);
|
|
||||||
for(i=ist;i<=ien;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
t1c = (double *) t2b[i-1];
|
|
||||||
t1d = (double *) t2b[i+1];
|
|
||||||
for(j=jst;j<=jen;j++) {
|
|
||||||
t1a[j] = fac * (t1d[j]+t1c[j]+t1b[j+1]+t1b[j-1] -
|
|
||||||
ressqr*t1b[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
multig(procid);
|
|
||||||
|
|
||||||
for(i=istart;i<=iend;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
for(j=jstart;j<=jend;j++) {
|
|
||||||
t1b[j] = t1a[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->sl_prini,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
/* update the local running sum psibipriv by summing all the resulting
|
|
||||||
values in that process's share of the psib matrix */
|
|
||||||
|
|
||||||
t2a = (double **) psib[procid];
|
|
||||||
psibipriv=0.0;
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
psibipriv = psibipriv + 0.25*(t2a[0][0]);
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
psibipriv = psibipriv + 0.25*(t2a[0][jm-1]);
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
psibipriv=psibipriv+0.25*(t2a[im-1][0]);
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
psibipriv=psibipriv+0.25*(t2a[im-1][jm-1]);
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
psibipriv = psibipriv + 0.5*t1a[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
psibipriv = psibipriv + 0.5*t1a[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
psibipriv = psibipriv + 0.5*t2a[j][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
psibipriv = psibipriv + 0.5*t2a[j][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
psibipriv = psibipriv + t1a[iindex];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update the shared variable psibi by summing all the psibiprivs
|
|
||||||
of the individual processes into it. note that this combined
|
|
||||||
private and shared sum method avoids accessing the shared
|
|
||||||
variable psibi once for every element of the matrix. */
|
|
||||||
|
|
||||||
LOCK(locks->psibilock)
|
|
||||||
global->psibi = global->psibi + psibipriv;
|
|
||||||
UNLOCK(locks->psibilock)
|
|
||||||
|
|
||||||
/* initialize psim matrices
|
|
||||||
|
|
||||||
if there is more than one process, then split the processes
|
|
||||||
between the two psim matrices; otherwise, let the single process
|
|
||||||
work on one first and then the other */
|
|
||||||
|
|
||||||
for(psiindex=0;psiindex<=1;psiindex++) {
|
|
||||||
t2a = (double **) psim[procid][psiindex];
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0] = 0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0] = 0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
t1a[iindex] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialize psi matrices the same way */
|
|
||||||
|
|
||||||
for(psiindex=0;psiindex<=1;psiindex++) {
|
|
||||||
t2a = (double **) psi[procid][psiindex];
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0] = 0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0] = 0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
t1a[iindex] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* compute input curl of wind stress */
|
|
||||||
|
|
||||||
t2a = (double **) tauz[procid];
|
|
||||||
ysca1 = .5*ysca;
|
|
||||||
factor= -t0*pi/ysca1;
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0] = 0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0] = 0.0;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
||||||
sintemp = sin(sintemp);
|
|
||||||
t2a[0][jm-1] = factor*sintemp;
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
||||||
sintemp = sin(sintemp);
|
|
||||||
t2a[im-1][jm-1] = factor*sintemp;
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
sintemp = pi*((double) j+j_off)*res/ysca1;
|
|
||||||
sintemp = sin(sintemp);
|
|
||||||
curlt = factor*sintemp;
|
|
||||||
t1a[j] = curlt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
sintemp = pi*((double) j+j_off)*res/ysca1;
|
|
||||||
sintemp = sin(sintemp);
|
|
||||||
curlt = factor*sintemp;
|
|
||||||
t1a[j] = curlt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
sintemp = pi*((double) jm-1+j_off)*res/ysca1;
|
|
||||||
sintemp = sin(sintemp);
|
|
||||||
curlt = factor*sintemp;
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][jm-1] = curlt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
sintemp = pi*((double) iindex+j_off)*res/ysca1;
|
|
||||||
sintemp = sin(sintemp);
|
|
||||||
curlt = factor*sintemp;
|
|
||||||
t1a[iindex] = curlt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARRIER(bars->sl_onetime,nprocs)
|
|
||||||
#else
|
|
||||||
BARRIER(bars->barrier,nprocs)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/***************************************************************
|
|
||||||
one-time stuff over at this point
|
|
||||||
***************************************************************/
|
|
||||||
|
|
||||||
while (!endflag) {
|
|
||||||
while ((!dayflag) || (!dhourflag)) {
|
|
||||||
dayflag = 0;
|
|
||||||
dhourflag = 0;
|
|
||||||
if (nstep == 1) {
|
|
||||||
if (procid == MASTER) {
|
|
||||||
CLOCK(global->trackstart)
|
|
||||||
}
|
|
||||||
if ((procid == MASTER) || (do_stats)) {
|
|
||||||
CLOCK(t1);
|
|
||||||
gp[procid].total_time = t1;
|
|
||||||
gp[procid].multi_time = 0;
|
|
||||||
}
|
|
||||||
/* POSSIBLE ENHANCEMENT: Here is where one might reset the
|
|
||||||
statistics that one is measuring about the parallel execution */
|
|
||||||
}
|
|
||||||
|
|
||||||
slave2(procid,firstrow,lastrow,numrows,firstcol,lastcol,numcols);
|
|
||||||
|
|
||||||
/* update time and step number
|
|
||||||
note that these time and step variables are private i.e. every
|
|
||||||
process has its own copy and keeps track of its own time */
|
|
||||||
|
|
||||||
ttime = ttime + dtau;
|
|
||||||
nstep = nstep + 1;
|
|
||||||
day = ttime/86400.0;
|
|
||||||
|
|
||||||
if (day > ((double) outday0)) {
|
|
||||||
dayflag = 1;
|
|
||||||
iday = (long) day;
|
|
||||||
dhour = dhour+dtau;
|
|
||||||
if (dhour >= 86400.0) {
|
|
||||||
dhourflag = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dhour = 0.0;
|
|
||||||
|
|
||||||
t2a = (double **) psium[procid];
|
|
||||||
t2b = (double **) psim[procid][0];
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0] = t2a[0][0]+t2b[0][0];
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0] = t2a[im-1][0]+t2b[im-1][0];
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1] = t2a[0][jm-1]+t2b[0][jm-1];
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1] = t2a[im-1][jm-1] +
|
|
||||||
t2b[im-1][jm-1];
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) t2b[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = t1a[j]+t1b[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) t2b[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = t1a[j] + t1b[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][0] = t2a[j][0]+t2b[j][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][jm-1] = t2a[j][jm-1] +
|
|
||||||
t2b[j][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
t1a[iindex] = t1a[iindex] + t1b[iindex];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* update values of psilm array to psilm + psim[2] */
|
|
||||||
|
|
||||||
t2a = (double **) psilm[procid];
|
|
||||||
t2b = (double **) psim[procid][1];
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[0][0] = t2a[0][0]+t2b[0][0];
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
|
|
||||||
t2a[im-1][0] = t2a[im-1][0]+t2b[im-1][0];
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[0][jm-1] = t2a[0][jm-1]+t2b[0][jm-1];
|
|
||||||
}
|
|
||||||
if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
|
|
||||||
t2a[im-1][jm-1] = t2a[im-1][jm-1] +
|
|
||||||
t2b[im-1][jm-1];
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[UP] == -1) {
|
|
||||||
t1a = (double *) t2a[0];
|
|
||||||
t1b = (double *) t2b[0];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = t1a[j]+t1b[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[DOWN] == -1) {
|
|
||||||
t1a = (double *) t2a[im-1];
|
|
||||||
t1b = (double *) t2b[im-1];
|
|
||||||
for(j=firstcol;j<=lastcol;j++) {
|
|
||||||
t1a[j] = t1a[j]+t1b[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[LEFT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][0] = t2a[j][0]+t2b[j][0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (gp[procid].neighbors[RIGHT] == -1) {
|
|
||||||
for(j=firstrow;j<=lastrow;j++) {
|
|
||||||
t2a[j][jm-1] = t2a[j][jm-1] + t2b[j][jm-1];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(i=firstrow;i<=lastrow;i++) {
|
|
||||||
t1a = (double *) t2a[i];
|
|
||||||
t1b = (double *) t2b[i];
|
|
||||||
for(iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
t1a[iindex] = t1a[iindex] + t1b[iindex];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (iday >= (long) outday3) {
|
|
||||||
endflag = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((procid == MASTER) || (do_stats)) {
|
|
||||||
CLOCK(t1);
|
|
||||||
gp[procid].total_time = t1-gp[procid].total_time;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,112 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
void subblock()
|
|
||||||
|
|
||||||
{
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
long k;
|
|
||||||
long xportion;
|
|
||||||
long xextra;
|
|
||||||
long yportion;
|
|
||||||
long yextra;
|
|
||||||
long my_num;
|
|
||||||
|
|
||||||
/* Determine starting coord and number of points to process in */
|
|
||||||
/* each direction */
|
|
||||||
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
xportion = (jmx[i] - 2) / xprocs;
|
|
||||||
xextra = (jmx[i] - 2) % xprocs;
|
|
||||||
for (j=0;j<xprocs;j++) {
|
|
||||||
for (k=0;k<yprocs;k++) {
|
|
||||||
gp[k*xprocs+j].rel_num_x[i] = xportion;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
yportion = (imx[i] - 2) / yprocs;
|
|
||||||
yextra = (imx[i] - 2) % yprocs;
|
|
||||||
for (j=0;j<yprocs;j++) {
|
|
||||||
for (k=0;k<xprocs;k++) {
|
|
||||||
gp[j*xprocs+k].rel_num_y[i] = yportion;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (my_num=0;my_num<nprocs;my_num++) {
|
|
||||||
for (i=0;i<numlev;i++) {
|
|
||||||
gp[my_num].rlist[i] = 1;
|
|
||||||
gp[my_num].rljst[i] = 1;
|
|
||||||
gp[my_num].rlien[i] = gp[my_num].rlist[i] + gp[my_num].rel_num_y[i];
|
|
||||||
gp[my_num].rljen[i] = gp[my_num].rljst[i] + gp[my_num].rel_num_x[i];
|
|
||||||
gp[my_num].eist[i] = gp[my_num].rlist[i] + 1;
|
|
||||||
gp[my_num].oist[i] = gp[my_num].rlist[i];
|
|
||||||
gp[my_num].ejst[i] = gp[my_num].rljst[i] + 1;
|
|
||||||
gp[my_num].ojst[i] = gp[my_num].rljst[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i=0;i<nprocs;i++) {
|
|
||||||
gp[i].neighbors[LEFT] = -1;
|
|
||||||
gp[i].neighbors[RIGHT] = -1;
|
|
||||||
gp[i].neighbors[UP] = -1;
|
|
||||||
gp[i].neighbors[DOWN] = -1;
|
|
||||||
gp[i].neighbors[UPLEFT] = -1;
|
|
||||||
gp[i].neighbors[UPRIGHT] = -1;
|
|
||||||
gp[i].neighbors[DOWNLEFT] = -1;
|
|
||||||
gp[i].neighbors[DOWNRIGHT] = -1;
|
|
||||||
if (i >= xprocs) {
|
|
||||||
gp[i].neighbors[UP] = i-xprocs;
|
|
||||||
}
|
|
||||||
if (i < nprocs-xprocs) {
|
|
||||||
gp[i].neighbors[DOWN] = i+xprocs;
|
|
||||||
}
|
|
||||||
if ((i % xprocs) > 0) {
|
|
||||||
gp[i].neighbors[LEFT] = i-1;
|
|
||||||
}
|
|
||||||
if ((i % xprocs) < (xprocs-1)) {
|
|
||||||
gp[i].neighbors[RIGHT] = i+1;
|
|
||||||
}
|
|
||||||
j = gp[i].neighbors[UP];
|
|
||||||
if (j != -1) {
|
|
||||||
if ((j % xprocs) > 0) {
|
|
||||||
gp[i].neighbors[UPLEFT] = j-1;
|
|
||||||
}
|
|
||||||
if ((j % xprocs) < (xprocs-1)) {
|
|
||||||
gp[i].neighbors[UPRIGHT] = j+1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
j = gp[i].neighbors[DOWN];
|
|
||||||
if (j != -1) {
|
|
||||||
if ((j % xprocs) > 0) {
|
|
||||||
gp[i].neighbors[DOWNLEFT] = j-1;
|
|
||||||
}
|
|
||||||
if ((j % xprocs) < (xprocs-1)) {
|
|
||||||
gp[i].neighbors[DOWNRIGHT] = j+1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i=0;i<nprocs;i++) {
|
|
||||||
gp[i].rownum = i/xprocs;
|
|
||||||
gp[i].colnum = i%xprocs;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
TARGET = OCEAN
|
|
||||||
OBJS = jacobcalc.o laplacalc.o main.o multi.o slave1.o slave2.o
|
|
||||||
|
|
||||||
include ../../../Makefile.config
|
|
||||||
|
|
||||||
decs.h: decs.H
|
|
||||||
jacobcalc.c: decs.h
|
|
||||||
main.c: decs.h
|
|
||||||
slave1.c: decs.h
|
|
||||||
laplacalc.c: decs.h
|
|
||||||
multi.c : decs.h
|
|
||||||
slave2.c: decs.h
|
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
shmid 0x2f44 shmvaddr 0x500000
|
|
||||||
|
|
||||||
Ocean simulation with W-cycle multigrid solver
|
|
||||||
Processors : 1
|
|
||||||
Grid size : 258 x 258
|
|
||||||
Grid resolution (meters) : 20000.00
|
|
||||||
Time between relaxations (seconds) : 28800
|
|
||||||
Error tolerance : 1e-07
|
|
||||||
|
|
||||||
MULTIGRID OUTPUTS
|
|
||||||
iter 71, level 8, residual norm 8.00274594e-08, work = 33.875
|
|
||||||
iter 31, level 8, residual norm 4.08062997e-08, work = 13.563
|
|
||||||
iter 22, level 8, residual norm 5.94548243e-08, work = 9.438
|
|
||||||
iter 12, level 8, residual norm 4.05573548e-08, work = 6.188
|
|
||||||
iter 2, level 8, residual norm 8.20209761e-08, work = 2.000
|
|
||||||
iter 5, level 8, residual norm 6.54258352e-08, work = 5.000
|
|
||||||
iter 3, level 8, residual norm 7.23930444e-08, work = 3.000
|
|
||||||
iter 12, level 8, residual norm 3.56346364e-08, work = 6.188
|
|
||||||
iter 2, level 8, residual norm 5.93080936e-08, work = 2.000
|
|
||||||
iter 4, level 8, residual norm 8.54596640e-08, work = 4.000
|
|
||||||
iter 11, level 8, residual norm 3.70162668e-08, work = 6.125
|
|
||||||
iter 13, level 8, residual norm 3.34750572e-08, work = 7.188
|
|
||||||
iter 12, level 8, residual norm 2.45353138e-08, work = 6.188
|
|
||||||
|
|
||||||
PROCESS STATISTICS
|
|
||||||
Total Multigrid Multigrid
|
|
||||||
Proc Time Time Fraction
|
|
||||||
0 79990673 21750269 0.272
|
|
||||||
Avg 79990673 21750269 0.272
|
|
||||||
Min 79990673 21750269 0.272
|
|
||||||
Max 79990673 21750269 0.272
|
|
||||||
|
|
||||||
TIMING INFORMATION
|
|
||||||
Start time : 972660212
|
|
||||||
Initialization finish time : 1017890583
|
|
||||||
Overall finish time : 1097881258
|
|
||||||
Total time with initialization : 125221046
|
|
||||||
Total time without initialization : 79990675
|
|
||||||
(excludes first timestep)
|
|
||||||
|
|
|
@ -1,262 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
#define INPROCS 16
|
|
||||||
#define IMAX 258
|
|
||||||
#define JMAX 258
|
|
||||||
#define MAX_LEVELS 9
|
|
||||||
#define MASTER 0
|
|
||||||
#define RED_ITER 0
|
|
||||||
#define BLACK_ITER 1
|
|
||||||
#define PAGE_SIZE 4096
|
|
||||||
|
|
||||||
EXTERN_ENV
|
|
||||||
|
|
||||||
struct global_struct {
|
|
||||||
long id;
|
|
||||||
long starttime;
|
|
||||||
long trackstart;
|
|
||||||
double psiai;
|
|
||||||
double psibi;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct fields_struct {
|
|
||||||
double psi[2][IMAX][JMAX];
|
|
||||||
double psim[2][IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct fields2_struct {
|
|
||||||
double psium[IMAX][JMAX];
|
|
||||||
double psilm[IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct wrk1_struct {
|
|
||||||
double psib[IMAX][JMAX];
|
|
||||||
double ga[IMAX][JMAX];
|
|
||||||
double gb[IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct wrk3_struct {
|
|
||||||
double work1[2][IMAX][JMAX];
|
|
||||||
double work2[IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct wrk2_struct {
|
|
||||||
double work3[IMAX][JMAX];
|
|
||||||
double f[IMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct wrk4_struct {
|
|
||||||
double work4[2][IMAX][JMAX];
|
|
||||||
double work5[2][IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct wrk6_struct {
|
|
||||||
double work6[IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct wrk5_struct {
|
|
||||||
double work7[2][IMAX][JMAX];
|
|
||||||
double temparray[2][IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct frcng_struct {
|
|
||||||
double tauz[IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct iter_struct {
|
|
||||||
long notdone;
|
|
||||||
double work8[IMAX][JMAX];
|
|
||||||
double work9[IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct guess_struct {
|
|
||||||
double oldga[IMAX][JMAX];
|
|
||||||
double oldgb[IMAX][JMAX];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct multi_struct {
|
|
||||||
double q_multi[MAX_LEVELS][IMAX][JMAX];
|
|
||||||
double rhs_multi[MAX_LEVELS][IMAX][JMAX];
|
|
||||||
double err_multi;
|
|
||||||
long numspin;
|
|
||||||
long spinflag[INPROCS];
|
|
||||||
};
|
|
||||||
|
|
||||||
struct locks_struct {
|
|
||||||
LOCKDEC(idlock)
|
|
||||||
LOCKDEC(psiailock)
|
|
||||||
LOCKDEC(psibilock)
|
|
||||||
LOCKDEC(donelock)
|
|
||||||
LOCKDEC(error_lock)
|
|
||||||
LOCKDEC(bar_lock)
|
|
||||||
};
|
|
||||||
|
|
||||||
struct bars_struct {
|
|
||||||
#if defined(MULTIPLE_BARRIERS)
|
|
||||||
BARDEC(iteration)
|
|
||||||
BARDEC(gsudn)
|
|
||||||
BARDEC(p_setup)
|
|
||||||
BARDEC(p_redph)
|
|
||||||
BARDEC(p_soln)
|
|
||||||
BARDEC(p_subph)
|
|
||||||
BARDEC(sl_prini)
|
|
||||||
BARDEC(sl_psini)
|
|
||||||
BARDEC(sl_onetime)
|
|
||||||
BARDEC(sl_phase_1)
|
|
||||||
BARDEC(sl_phase_2)
|
|
||||||
BARDEC(sl_phase_3)
|
|
||||||
BARDEC(sl_phase_4)
|
|
||||||
BARDEC(sl_phase_5)
|
|
||||||
BARDEC(sl_phase_6)
|
|
||||||
BARDEC(sl_phase_7)
|
|
||||||
BARDEC(sl_phase_8)
|
|
||||||
BARDEC(sl_phase_9)
|
|
||||||
BARDEC(sl_phase_10)
|
|
||||||
BARDEC(error_barrier)
|
|
||||||
#else
|
|
||||||
BARDEC(barrier)
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct global_struct *global;
|
|
||||||
extern struct fields_struct *fields;
|
|
||||||
extern struct fields2_struct *fields2;
|
|
||||||
extern struct wrk1_struct *wrk1;
|
|
||||||
extern struct wrk3_struct *wrk3;
|
|
||||||
extern struct wrk2_struct *wrk2;
|
|
||||||
extern struct wrk4_struct *wrk4;
|
|
||||||
extern struct wrk6_struct *wrk6;
|
|
||||||
extern struct wrk5_struct *wrk5;
|
|
||||||
extern struct frcng_struct *frcng;
|
|
||||||
extern struct iter_struct *iter;
|
|
||||||
extern struct guess_struct *guess;
|
|
||||||
extern struct multi_struct *multi;
|
|
||||||
extern struct locks_struct *locks;
|
|
||||||
extern struct bars_struct *bars;
|
|
||||||
|
|
||||||
extern double eig2;
|
|
||||||
extern double ysca;
|
|
||||||
extern long jmm1;
|
|
||||||
extern double pi;
|
|
||||||
extern double t0;
|
|
||||||
|
|
||||||
extern long *procmap;
|
|
||||||
extern long xprocs;
|
|
||||||
extern long yprocs;
|
|
||||||
|
|
||||||
extern long numlev;
|
|
||||||
extern long imx[MAX_LEVELS];
|
|
||||||
extern long jmx[MAX_LEVELS];
|
|
||||||
extern double lev_res[MAX_LEVELS];
|
|
||||||
extern double lev_tol[MAX_LEVELS];
|
|
||||||
extern double maxwork;
|
|
||||||
extern long minlevel;
|
|
||||||
extern double outday0;
|
|
||||||
extern double outday1;
|
|
||||||
extern double outday2;
|
|
||||||
extern double outday3;
|
|
||||||
|
|
||||||
extern long nprocs;
|
|
||||||
|
|
||||||
extern double h1;
|
|
||||||
extern double h3;
|
|
||||||
extern double h;
|
|
||||||
extern double lf;
|
|
||||||
extern double res;
|
|
||||||
extern double dtau;
|
|
||||||
extern double f0;
|
|
||||||
extern double beta;
|
|
||||||
extern double gpr;
|
|
||||||
extern long im;
|
|
||||||
extern long jm;
|
|
||||||
extern long do_stats;
|
|
||||||
extern long do_output;
|
|
||||||
extern long *multi_times;
|
|
||||||
extern long *total_times;
|
|
||||||
extern double factjacob;
|
|
||||||
extern double factlap;
|
|
||||||
|
|
||||||
struct Global_Private {
|
|
||||||
char pad[PAGE_SIZE];
|
|
||||||
double multi_time;
|
|
||||||
double total_time;
|
|
||||||
long rel_start_x[MAX_LEVELS];
|
|
||||||
long rel_start_y[MAX_LEVELS];
|
|
||||||
long rel_num_x[MAX_LEVELS];
|
|
||||||
long rel_num_y[MAX_LEVELS];
|
|
||||||
long eist[MAX_LEVELS];
|
|
||||||
long ejst[MAX_LEVELS];
|
|
||||||
long oist[MAX_LEVELS];
|
|
||||||
long ojst[MAX_LEVELS];
|
|
||||||
long eiest[MAX_LEVELS];
|
|
||||||
long ejest[MAX_LEVELS];
|
|
||||||
long oiest[MAX_LEVELS];
|
|
||||||
long ojest[MAX_LEVELS];
|
|
||||||
long rlist[MAX_LEVELS];
|
|
||||||
long rljst[MAX_LEVELS];
|
|
||||||
long rlien[MAX_LEVELS];
|
|
||||||
long rljen[MAX_LEVELS];
|
|
||||||
long iist[MAX_LEVELS];
|
|
||||||
long ijst[MAX_LEVELS];
|
|
||||||
long iien[MAX_LEVELS];
|
|
||||||
long ijen[MAX_LEVELS];
|
|
||||||
long pist[MAX_LEVELS];
|
|
||||||
long pjst[MAX_LEVELS];
|
|
||||||
long pien[MAX_LEVELS];
|
|
||||||
long pjen[MAX_LEVELS];
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct Global_Private *gp;
|
|
||||||
|
|
||||||
extern double i_int_coeff[MAX_LEVELS];
|
|
||||||
extern double j_int_coeff[MAX_LEVELS];
|
|
||||||
extern long minlev;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* jacobcalc.C
|
|
||||||
*/
|
|
||||||
void jacobcalc(double x[IMAX][JMAX], double y[IMAX][JMAX], double z[IMAX][JMAX], long pid, long firstrow, long lastrow, long firstcol, long lastcol, long numrows, long numcols);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* laplacalc.C
|
|
||||||
*/
|
|
||||||
void laplacalc(double x[IMAX][JMAX], double z[IMAX][JMAX], long firstrow, long lastrow, long firstcol, long lastcol, long numrows, long numcols);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* main.C
|
|
||||||
*/
|
|
||||||
long log_2(long number);
|
|
||||||
void printerr(char *s);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* multi.C
|
|
||||||
*/
|
|
||||||
void multig(long my_id);
|
|
||||||
void relax(long k, double *err, long color, long my_num);
|
|
||||||
void rescal(long kf, long my_num);
|
|
||||||
void intadd(long kc, long my_num);
|
|
||||||
void putz(long k, long my_num);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* slave1.C
|
|
||||||
*/
|
|
||||||
void slave(void);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* slave2.C
|
|
||||||
*/
|
|
||||||
void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols);
|
|
|
@ -1,98 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/* does the arakawa jacobian calculation (of the x and y matrices,
|
|
||||||
putting the results in the z matrix) for a subblock. */
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
void jacobcalc(double x[IMAX][JMAX], double y[IMAX][JMAX], double z[IMAX][JMAX], long pid, long firstrow, long lastrow, long firstcol, long lastcol, long numrows, long numcols)
|
|
||||||
{
|
|
||||||
double f1;
|
|
||||||
double f2;
|
|
||||||
double f3;
|
|
||||||
double f4;
|
|
||||||
double f5;
|
|
||||||
double f6;
|
|
||||||
double f7;
|
|
||||||
double f8;
|
|
||||||
long iindex;
|
|
||||||
long indexp1;
|
|
||||||
long indexm1;
|
|
||||||
long im1;
|
|
||||||
long ip1;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
if (pid == MASTER) {
|
|
||||||
z[0][0]=0.0;
|
|
||||||
}
|
|
||||||
if (pid == nprocs-xprocs) {
|
|
||||||
z[im-1][0]=0.0;
|
|
||||||
}
|
|
||||||
if (pid == xprocs-1) {
|
|
||||||
z[0][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
if (pid == nprocs-1) {
|
|
||||||
z[im-1][jm-1]=0.0;
|
|
||||||
}
|
|
||||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
indexp1 = iindex+1;
|
|
||||||
indexm1 = iindex-1;
|
|
||||||
for (i=firstrow;i<=lastrow;i++) {
|
|
||||||
ip1 = i+1;
|
|
||||||
im1 = i-1;
|
|
||||||
f1 = (y[i][indexm1]+y[ip1][indexm1]-y[i][indexp1]-y[ip1][indexp1])*
|
|
||||||
(x[ip1][iindex]-x[i][iindex]);
|
|
||||||
f2 = (y[im1][indexm1]+y[i][indexm1]-y[im1][indexp1]-y[i][indexp1])*
|
|
||||||
(x[i][iindex]-x[im1][iindex]);
|
|
||||||
f3 = (y[ip1][iindex]+y[ip1][indexp1]-y[im1][iindex]-y[im1][indexp1])*
|
|
||||||
(x[i][indexp1]-x[i][iindex]);
|
|
||||||
f4 = (y[ip1][indexm1]+y[ip1][iindex]-y[im1][indexm1]-y[im1][iindex])*
|
|
||||||
(x[i][iindex]-x[i][indexm1]);
|
|
||||||
f5 = (y[ip1][iindex]-y[i][indexp1])*(x[ip1][indexp1]-x[i][iindex]);
|
|
||||||
f6 = (y[i][indexm1]-y[im1][iindex])*(x[i][iindex]-x[im1][indexm1]);
|
|
||||||
f7 = (y[i][indexp1]-y[im1][iindex])*(x[im1][indexp1]-x[i][iindex]);
|
|
||||||
f8 = (y[ip1][iindex]-y[i][indexm1])*(x[i][iindex]-x[ip1][indexm1]);
|
|
||||||
|
|
||||||
z[i][iindex] = factjacob*(f1+f2+f3+f4+f5+f6+f7+f8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (firstrow == 1) {
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
z[0][j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((firstrow+numrows) == im-1) {
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
z[im-1][j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (firstcol == 1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
z[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((firstcol+numcols) == jm-1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
z[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,68 +0,0 @@
|
||||||
/*************************************************************************/
|
|
||||||
/* */
|
|
||||||
/* Copyright (c) 1994 Stanford University */
|
|
||||||
/* */
|
|
||||||
/* All rights reserved. */
|
|
||||||
/* */
|
|
||||||
/* Permission is given to use, copy, and modify this software for any */
|
|
||||||
/* non-commercial purpose as long as this copyright notice is not */
|
|
||||||
/* removed. All other uses, including redistribution in whole or in */
|
|
||||||
/* part, are forbidden without prior written permission. */
|
|
||||||
/* */
|
|
||||||
/* This software is provided with absolutely no warranty and no */
|
|
||||||
/* support. */
|
|
||||||
/* */
|
|
||||||
/*************************************************************************/
|
|
||||||
|
|
||||||
/* **************************************************************
|
|
||||||
end of subroutine jacobcalc
|
|
||||||
**************************************************************
|
|
||||||
|
|
||||||
performs the laplacian calculation for a subblock. */
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include "decs.h"
|
|
||||||
|
|
||||||
void laplacalc(double x[IMAX][JMAX], double z[IMAX][JMAX], long firstrow, long lastrow, long firstcol, long lastcol, long numrows, long numcols)
|
|
||||||
{
|
|
||||||
long iindex;
|
|
||||||
long indexp1;
|
|
||||||
long indexm1;
|
|
||||||
long ip1;
|
|
||||||
long im1;
|
|
||||||
long i;
|
|
||||||
long j;
|
|
||||||
|
|
||||||
for (iindex=firstcol;iindex<=lastcol;iindex++) {
|
|
||||||
indexp1 = iindex+1;
|
|
||||||
indexm1 = iindex-1;
|
|
||||||
for (i=firstrow;i<=lastrow;i++) {
|
|
||||||
ip1 = i+1;
|
|
||||||
im1 = i-1;
|
|
||||||
z[i][iindex] = factlap*(x[ip1][iindex]+x[im1][iindex]+x[i][indexp1]+
|
|
||||||
x[i][indexm1]-4.*x[i][iindex]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (firstrow == 1) {
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
z[0][j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((firstrow+numrows) == im-1) {
|
|
||||||
for (j=firstcol;j<=lastcol;j++) {
|
|
||||||
z[im-1][j] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (firstcol == 1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
z[j][0] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((firstcol+numcols) == jm-1) {
|
|
||||||
for (j=firstrow;j<=lastrow;j++) {
|
|
||||||
z[j][jm-1] = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue