diff --git a/ext/mcpat/ARM_A9.xml b/ext/mcpat/ARM_A9.xml
new file mode 100644
index 000000000..9289b6644
--- /dev/null
+++ b/ext/mcpat/ARM_A9.xml
@@ -0,0 +1,415 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/ARM_A9_2000.xml b/ext/mcpat/ARM_A9_2000.xml
new file mode 100644
index 000000000..c040e1be3
--- /dev/null
+++ b/ext/mcpat/ARM_A9_2000.xml
@@ -0,0 +1,463 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/ARM_A9_800.xml b/ext/mcpat/ARM_A9_800.xml
new file mode 100644
index 000000000..fd7b21438
--- /dev/null
+++ b/ext/mcpat/ARM_A9_800.xml
@@ -0,0 +1,463 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/Alpha21364.xml b/ext/mcpat/Alpha21364.xml
new file mode 100644
index 000000000..c40c4f50b
--- /dev/null
+++ b/ext/mcpat/Alpha21364.xml
@@ -0,0 +1,456 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/Niagara1.xml b/ext/mcpat/Niagara1.xml
new file mode 100644
index 000000000..ae748e246
--- /dev/null
+++ b/ext/mcpat/Niagara1.xml
@@ -0,0 +1,442 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/Niagara1_sharing.xml b/ext/mcpat/Niagara1_sharing.xml
new file mode 100644
index 000000000..93531aebd
--- /dev/null
+++ b/ext/mcpat/Niagara1_sharing.xml
@@ -0,0 +1,400 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/Niagara1_sharing_DC.xml b/ext/mcpat/Niagara1_sharing_DC.xml
new file mode 100644
index 000000000..574ec8157
--- /dev/null
+++ b/ext/mcpat/Niagara1_sharing_DC.xml
@@ -0,0 +1,442 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/Niagara1_sharing_SBT.xml b/ext/mcpat/Niagara1_sharing_SBT.xml
new file mode 100644
index 000000000..32eeca382
--- /dev/null
+++ b/ext/mcpat/Niagara1_sharing_SBT.xml
@@ -0,0 +1,455 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/Niagara1_sharing_ST.xml b/ext/mcpat/Niagara1_sharing_ST.xml
new file mode 100644
index 000000000..3f0573fe9
--- /dev/null
+++ b/ext/mcpat/Niagara1_sharing_ST.xml
@@ -0,0 +1,443 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ext/mcpat/Niagara2.xml b/ext/mcpat/Niagara2.xml
new file mode 100644
index 000000000..c7e311ff8
--- /dev/null
+++ b/ext/mcpat/Niagara2.xml
@@ -0,0 +1,438 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/Penryn.xml b/ext/mcpat/Penryn.xml
new file mode 100644
index 000000000..fe9715b77
--- /dev/null
+++ b/ext/mcpat/Penryn.xml
@@ -0,0 +1,456 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/README b/ext/mcpat/README
new file mode 100644
index 000000000..4887b1037
--- /dev/null
+++ b/ext/mcpat/README
@@ -0,0 +1,226 @@
+ __ __ ____ _ _____ ____ _
+| \/ | ___| _ \ / \|_ _| | __ ) ___| |_ __ _
+| |\/| |/ __| |_) / _ \ | | | _ \ / _ \ __|/ _` |
+| | | | (__| __/ ___ \| | | |_) | __/ |_| (_| |
+|_| |_|\___|_| /_/ \_\_| |____/ \___|\__|\__,_|
+
+McPAT: Multicore Power, Area, and Timing
+Current version 0.8Beta
+===============================
+
+McPAT is an architectural modeling tool for chip multiprocessors (CMP)
+The main focus of McPAT is accurate power and area
+modeling, and a target clock rate is used as a design constraint.
+McPAT performs automatic extensive search to find optimal designs
+that satisfy the target clock frequency.
+
+For complete documentation of the McPAT, please refer McPAT 1.0
+technical report and the following paper,
+"McPAT: An Integrated Power, Area, and Timing Modeling
+ Framework for Multicore and Manycore Architectures",
+that appears in MICRO 2009. Please cite the paper, if you use
+McPAT in your work. The bibtex entry is provided below for your convenience.
+
+ @inproceedings{mcpat:micro,
+ author = {Sheng Li and Jung Ho Ahn and Richard D. Strong and Jay B. Brockman and Dean M. Tullsen and Norman P. Jouppi},
+ title = "{McPAT: An Integrated Power, Area, and Timing Modeling Framework for Multicore and Manycore Architectures}",
+ booktitle = {MICRO 42: Proceedings of the 42nd Annual IEEE/ACM International Symposium on Microarchitecture},
+ year = {2009},
+ pages = {469--480},
+ }
+
+Current McPAT is in its beta release.
+List of features of beta release
+===============================
+The following are the list of features supported by the tool.
+
+* Power, area, and timing models for CMPs with:
+ Inorder cores both single and multithreaded
+ OOO cores both single and multithreaded
+ Shared/coherent caches with directory hardware:
+ including directory cache, shadowed tag directory
+ and static bank mapped tag directory
+ Network-on-Chip
+ On-chip memory controllers
+
+* Internal models are based on real modern processors:
+ Inorder models are based on Sun Niagara family
+ OOO models are based on Intel P6 for reservation
+ station based OOO cores, and on Intel Netburst and
+ Alpha 21264 for physical register file based OOO cores.
+
+* Leakage power modeling considers both sub-threshold leakage
+ and gate leakage power. The impact of operating temperature
+ on both leakage power are considered. Longer channel devices
+ that can reduce leakage significantly with modest performance
+ penalty are also modeled.
+
+* McPAT supports automatic extensive search to find optimal designs
+ that satisfy the target clock frequency. The timing constraint
+ include both throughput and latency.
+
+* Interconnect model with different delay, power, and area
+ properties, as well as both the aggressive and conservative
+ interconnect projections on wire technologies.
+
+* All process specific values used by the McPAT are obtained
+ from ITRS and currently, the McPAT supports 90nm, 65nm, 45nm,
+ 32nm, and 22nm technology nodes. At 32nm and 22nm nodes, SOI
+ and DG devices are used. After 45nm, Hi-K metal gates are used.
+
+How to use the tool?
+====================
+
+McPAT takes input parameters from an XML-based interface,
+then it computes area and peak power of the
+Please note that the peak power is the absolute worst case power,
+which could be even higher than TDP.
+
+1. Steps to run McPAT:
+ -> define the target processor using inorder.xml or OOO.xml
+ -> run the "mcpat" binary:
+ ./mcpat -infile <*.xml> -print_level < level of detailed output>
+ ./mcpat -h (or mcpat --help) will show the quick help message.
+
+ Rather than being hardwired to certain simulators, McPAT
+ uses an XML-based interface to enable easy integration
+ with various performance simulators. Our collaborator,
+ Richard Strong, at University of California, San Diego,
+ designed an experimental parser for the M5 simulator, aiming for
+ streamlining the integration of McPAT and M5. Please check the M5
+ repository/ for the latest version of the parser.
+
+2. Optimize:
+ McPAT will try its best to satisfy the target clock rate.
+ When it cannot find a valid solution, it gives out warnings,
+ while still giving a solution that is closest to the timing
+ constraints and calculate power based on it. The optimization
+ will lead to larger power/area numbers for target higher clock
+ rate. McPAT also provides the option "-opt_for_clk" to turn on
+ ("-opt_for_clk 1") and off this strict optimization for the
+ timing constraint. When it is off, McPAT always optimize
+ component for ED^2P without worrying about meeting the
+ target clock frequency. By turning it off, the computation time
+ can be reduced, which suites for situations where target clock rate
+ is conservative.
+
+3. The output:
+ McPAT outputs results in a hierarchical manner. Increasing
+ the "-print_level" will show detailed results inside each
+ component. For each component, major parts are shown, and associated
+ pipeline registers/control logic are added up in total area/power of each
+ components. In general, McPAT does not model the area/overhead of the pad
+ frame used in a processor die.
+
+4. How to use the XML interface for McPAT
+ 4.1 Set up the parameters
+ Parameters of target designs need to be set in the *.xml file for
+ entries taged as "param". McPAT have very detailed parameter settings.
+ please remove the structure parameter from the file if you want
+ to use the default values. Otherwise, the parameters in the xml file
+ will override the default values.
+
+ 4.2 Pass the statistics
+ There are two options to get the correct stats: a) the performance
+ simulator can capture all the stats in detail and pass them to McPAT;
+ b). Performance simulator can only capture partial stats and pass
+ them to McPAT, while McPAT can reason about the complete stats using
+ the partial information and the configuration. Therefore, there are
+ some overlap for the stats.
+
+ 4.3 Interface XML file structures (PLEASE READ!)
+ The XML is hierarchical from processor level to micro-architecture
+ level. McPAT support both heterogeneous and homogeneous manycore processors.
+
+ 1). For heterogeneous processor setup, each component (core, NoC, cache,
+ and etc) must have its own instantiations (core0, core1, ..., coreN).
+ Each instantiation will have different parameters as well as its stats.
+ Thus, the XML file must have multiple "instantiation" of each type of
+ heterogeneous components and the corresponding hetero flags must be set
+ in the XML file. Then state in the XML should be the stats of "a" instantiation
+ (e.g. "a" cores). The reported runtime dynamic is of a single instantiation
+ (e.g. "a" cores). Since the stats for each (e.g. "a" cores) may be different,
+ we will see a whole list of (e.g. "a" cores) with different dynamic power,
+ and total power is just a sum of them.
+
+ 2). For homogeneous processors, the same method for heterogeneous can
+ also be used by treating all homogeneous instantiations as heterogeneous.
+ However, a preferred approach is to use a single representative for all
+ the same components (e.g. core0 to represent all cores) and set the
+ processor to have homogeneous components (e.g. ). Thus, the XML file only has one instantiation to represent
+ all others with the same architectural parameters. The corresponding homo
+ flags must be set in the XML file. Then, the stats in the XML should be
+ the aggregated stats of the sum of all instantiations (e.g. aggregated stats
+ of all cores). In the final results, McPAT will only report a single
+ instantiation of each type of component, and the reported runtime dynamic power
+ is the sum of all instantiations of the same type. This approach can run fast
+ and use much less memory.
+
+5. Guide for integrating McPAT into performance simulators and bypassing the XML interface
+ The detailed work flow of McPAT has two phases: the initialization phase and
+ the computation phase. Specifically, in order to start the initialization phase a
+ user specifies static configurations, including parameters at all three levels,
+ namely, architectural, circuit, and technology levels. During the initialization
+ phase, McPAT will generate the internal chip representation using the configurations
+ set by the user.
+ The computation phase of McPAT is called by McPAT or the performance simulator
+ during simulation to generate runtime power numbers. Before calling McPAT to
+ compute runtime power numbers, the performance simulator needs to pass the
+ statistics, namely, the activity factors of each individual components to McPAT
+ via the XML interface.
+ The initialization phase is very time-consuming, since it will repeat many
+ times until valid configurations are found or the possible configurations are
+ exhausted. To reduce the overhead, a user can let the simulator to call McPAT
+ directly for computation phase and only call initialization phase once at the
+ beginning of simulation. In this case, the XML interface file is bypassed,
+ please refer to processor.cc to see how the two phases are called.
+
+6. Sample input files:
+ This package provide sample XML files for validating target processors. Please find the
+ enclosed Niagara1.xml (for the Sun Niagara1 processor), Niagara2.xml (for the Sun Niagara2
+ processor), Alpha21364.xml (for the Alpha21364 processor), and Xeon.xml (for the Intel
+ Xeon Tulsa processor).
+
+ Special instructions for using Xeon.xml:
+ McPAT uses ITRS device types including HP, LSTP, and LOP. Although most
+ designs follow ITRS projections, there are designs with special technologies.
+ For example, the 65nm Xeon Tulsa processor uses 1.25 V rather than 1.1V
+ for the core voltage domain, which results in the changes in threshold voltage,
+ leakage current density, saturation current, and etc, besides the different
+ supply voltage. We use MASTAR to match the special technology as used in Xeon
+ core domain. Therefore, in order to generate accurate results of Xeon
+ Tulsa cores, users need to do make TAR=mcpatXeonCore and use the generated
+ special executable. The L3 cache and buses must be computed using standard
+ ITRS technology.
+
+
+====================
+McPAT is in its beginning stage. We are still improving
+the tool and refining the code. Please come back to its website
+for newer versions. If you have any comments,
+questions, or suggestions, please write to us.
+
+Version history and roadmap
+
+McPAT Alpha: released Sep. 2009 Experimental release
+McPAT Beta (0.6): released Nov. 2009 New code base and technology base
+McPAT Beta (0.7): released May. 2010 Added various new models,
+ including long channel devices, buses model; together
+ with bug fixes and extensive code optimization to reduce
+ memory usage.
+McPAT Beta (0.8): released Aug. 2010 Added various new models,
+ including on-chip 10Gb ethernet units, PCIe, and flash controllers.
+Next major release:
+McPAT 1.0: including advance power-saving states
+
+Future releases may include the modeling of embedded low-power
+processors as well as vector processors and GPGPUs.
+
+
+Sheng Li
+sheng.li@hp.com
+
+
+
+
diff --git a/ext/mcpat/XML_Parse.cc b/ext/mcpat/XML_Parse.cc
new file mode 100644
index 000000000..ae3ee6f17
--- /dev/null
+++ b/ext/mcpat/XML_Parse.cc
@@ -0,0 +1,1798 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#include
+#include
+
+#include "XML_Parse.h"
+#include "xmlParser.h"
+
+using namespace std;
+
+void ParseXML::parse(char* filepath)
+{
+ unsigned int i,j,k,m,n;
+ unsigned int NumofCom_4;
+ unsigned int itmp;
+ //Initialize all structures
+ ParseXML::initialize();
+
+ // this open and parse the XML file:
+ XMLNode xMainNode=XMLNode::openFileHelper(filepath,"component"); //the 'component' in the first layer
+
+ XMLNode xNode2=xMainNode.getChildNode("component"); // the 'component' in the second layer
+ //get all params in the second layer
+ itmp=xNode2.nChildNode("param");
+ for(i=0; iOrderofComponents_3layer)
+ {
+ //___________________________get all system.core0-n________________________________________________
+ if (sys.homogeneous_cores==1) OrderofComponents_3layer=0;
+ else OrderofComponents_3layer=sys.number_of_cores-1;
+ for (i=0; i<=OrderofComponents_3layer; i++)
+ {
+ xNode3=xNode2.getChildNode("component",i);
+ if (xNode3.isEmpty()==1) {
+ printf("The value of homogeneous_cores or number_of_cores is not correct!");
+ exit(0);
+ }
+ else{
+ if (strstr(xNode3.getAttribute("name"),"core")!=NULL)
+ {
+ { //For cpu0-cpui
+ //Get all params with system.core?
+ itmp=xNode3.nChildNode("param");
+ for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
+ xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
+ if (xNode3.isEmpty()==1) {
+ printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
+ exit(0);
+ }
+ if (strstr(xNode3.getAttribute("id"),"system.mem")!=NULL)
+ {
+
+ itmp=xNode3.nChildNode("param");
+ for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
+ xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
+ if (xNode3.isEmpty()==1) {
+ printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
+ exit(0);
+ }
+ if (strstr(xNode3.getAttribute("id"),"system.mc")!=NULL)
+ {
+ itmp=xNode3.nChildNode("param");
+ for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
+ xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
+ if (xNode3.isEmpty()==1) {
+ printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
+ exit(0);
+ }
+ if (strstr(xNode3.getAttribute("id"),"system.niu")!=NULL)
+ {
+ itmp=xNode3.nChildNode("param");
+ for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
+ xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
+ if (xNode3.isEmpty()==1) {
+ printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
+ exit(0);
+ }
+ if (strstr(xNode3.getAttribute("id"),"system.pcie")!=NULL)
+ {
+ itmp=xNode3.nChildNode("param");
+ for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1;
+ xNode3=xNode2.getChildNode("component",OrderofComponents_3layer);
+ if (xNode3.isEmpty()==1) {
+ printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!");
+ exit(0);
+ }
+ if (strstr(xNode3.getAttribute("id"),"system.flashc")!=NULL)
+ {
+ itmp=xNode3.nChildNode("param");
+ for(k=0; k
+#include
+
+#include
+
+#include "xmlParser.h"
+using namespace std;
+
+/*
+void myfree(char *t); // {free(t);}
+ToXMLStringTool tx,tx2;
+*/
+//all subnodes at the level of system.core(0-n)
+//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+
+typedef struct{
+ int prediction_width;
+ char prediction_scheme[20];
+ int predictor_size;
+ int predictor_entries;
+ int local_predictor_size[20];
+ int local_predictor_entries;
+ int global_predictor_entries;
+ int global_predictor_bits;
+ int chooser_predictor_entries;
+ int chooser_predictor_bits;
+ double predictor_accesses;
+} predictor_systemcore;
+typedef struct{
+ int number_entries;
+ int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+ double total_hits;
+ double total_accesses;
+ double total_misses;
+ double conflicts;
+} itlb_systemcore;
+typedef struct{
+ //params
+ double icache_config[20];
+ int buffer_sizes[20];
+ int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+ //stats
+ double total_accesses;
+ double read_accesses;
+ double read_misses;
+ double replacements;
+ double read_hits;
+ double total_hits;
+ double total_misses;
+ double miss_buffer_access;
+ double fill_buffer_accesses;
+ double prefetch_buffer_accesses;
+ double prefetch_buffer_writes;
+ double prefetch_buffer_reads;
+ double prefetch_buffer_hits;
+ double conflicts;
+} icache_systemcore;
+typedef struct{
+ //params
+ int number_entries;
+ int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+ //stats
+ double total_accesses;
+ double read_accesses;
+ double write_accesses;
+ double write_hits;
+ double read_hits;
+ double read_misses;
+ double write_misses;
+ double total_hits;
+ double total_misses;
+ double conflicts;
+} dtlb_systemcore;
+typedef struct{
+ //params
+ double dcache_config[20];
+ int buffer_sizes[20];
+ int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+ //stats
+ double total_accesses;
+ double read_accesses;
+ double write_accesses;
+ double total_hits;
+ double total_misses;
+ double read_hits;
+ double write_hits;
+ double read_misses;
+ double write_misses;
+ double replacements;
+ double write_backs;
+ double miss_buffer_access;
+ double fill_buffer_accesses;
+ double prefetch_buffer_accesses;
+ double prefetch_buffer_writes;
+ double prefetch_buffer_reads;
+ double prefetch_buffer_hits;
+ double wbb_writes;
+ double wbb_reads;
+ double conflicts;
+} dcache_systemcore;
+typedef struct{
+ //params
+ int BTB_config[20];
+ //stats
+ double total_accesses;
+ double read_accesses;
+ double write_accesses;
+ double total_hits;
+ double total_misses;
+ double read_hits;
+ double write_hits;
+ double read_misses;
+ double write_misses;
+ double replacements;
+} BTB_systemcore;
+typedef struct{
+ //all params at the level of system.core(0-n)
+ int clock_rate;
+ bool opt_local;
+ bool x86;
+ int machine_bits;
+ int virtual_address_width;
+ int physical_address_width;
+ int opcode_width;
+ int micro_opcode_width;
+ int instruction_length;
+ int machine_type;
+ int internal_datapath_width;
+ int number_hardware_threads;
+ int fetch_width;
+ int number_instruction_fetch_ports;
+ int decode_width;
+ int issue_width;
+ int peak_issue_width;
+ int commit_width;
+ int pipelines_per_core[20];
+ int pipeline_depth[20];
+ char FPU[20];
+ char divider_multiplier[20];
+ int ALU_per_core;
+ double FPU_per_core;
+ int MUL_per_core;
+ int instruction_buffer_size;
+ int decoded_stream_buffer_size;
+ int instruction_window_scheme;
+ int instruction_window_size;
+ int fp_instruction_window_size;
+ int ROB_size;
+ int archi_Regs_IRF_size;
+ int archi_Regs_FRF_size;
+ int phy_Regs_IRF_size;
+ int phy_Regs_FRF_size;
+ int rename_scheme;
+ int register_windows_size;
+ char LSU_order[20];
+ int store_buffer_size;
+ int load_buffer_size;
+ int memory_ports;
+ char Dcache_dual_pump[20];
+ int RAS_size;
+ int fp_issue_width;
+ int prediction_width;
+ int number_of_BTB;
+ int number_of_BPT;
+
+ //all stats at the level of system.core(0-n)
+ double total_instructions;
+ double int_instructions;
+ double fp_instructions;
+ double branch_instructions;
+ double branch_mispredictions;
+ double committed_instructions;
+ double committed_int_instructions;
+ double committed_fp_instructions;
+ double load_instructions;
+ double store_instructions;
+ double total_cycles;
+ double idle_cycles;
+ double busy_cycles;
+ double instruction_buffer_reads;
+ double instruction_buffer_write;
+ double ROB_reads;
+ double ROB_writes;
+ double rename_accesses;
+ double fp_rename_accesses;
+ double rename_reads;
+ double rename_writes;
+ double fp_rename_reads;
+ double fp_rename_writes;
+ double inst_window_reads;
+ double inst_window_writes;
+ double inst_window_wakeup_accesses;
+ double inst_window_selections;
+ double fp_inst_window_reads;
+ double fp_inst_window_writes;
+ double fp_inst_window_wakeup_accesses;
+ double fp_inst_window_selections;
+ double archi_int_regfile_reads;
+ double archi_float_regfile_reads;
+ double phy_int_regfile_reads;
+ double phy_float_regfile_reads;
+ double phy_int_regfile_writes;
+ double phy_float_regfile_writes;
+ double archi_int_regfile_writes;
+ double archi_float_regfile_writes;
+ double int_regfile_reads;
+ double float_regfile_reads;
+ double int_regfile_writes;
+ double float_regfile_writes;
+ double windowed_reg_accesses;
+ double windowed_reg_transports;
+ double function_calls;
+ double context_switches;
+ double ialu_accesses;
+ double fpu_accesses;
+ double mul_accesses;
+ double cdb_alu_accesses;
+ double cdb_mul_accesses;
+ double cdb_fpu_accesses;
+ double load_buffer_reads;
+ double load_buffer_writes;
+ double load_buffer_cams;
+ double store_buffer_reads;
+ double store_buffer_writes;
+ double store_buffer_cams;
+ double store_buffer_forwards;
+ double main_memory_access;
+ double main_memory_read;
+ double main_memory_write;
+ double pipeline_duty_cycle;
+
+ double IFU_duty_cycle ;
+ double BR_duty_cycle ;
+ double LSU_duty_cycle ;
+ double MemManU_I_duty_cycle;
+ double MemManU_D_duty_cycle ;
+ double ALU_duty_cycle ;
+ double MUL_duty_cycle ;
+ double FPU_duty_cycle ;
+ double ALU_cdb_duty_cycle ;
+ double MUL_cdb_duty_cycle ;
+ double FPU_cdb_duty_cycle ;
+
+ //all subnodes at the level of system.core(0-n)
+ predictor_systemcore predictor;
+ itlb_systemcore itlb;
+ icache_systemcore icache;
+ dtlb_systemcore dtlb;
+ dcache_systemcore dcache;
+ BTB_systemcore BTB;
+
+} system_core;
+typedef struct{
+ //params
+ int Directory_type;
+ double Dir_config[20];
+ int buffer_sizes[20];
+ int clockrate;
+ int ports[20];
+ int device_type;
+ int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+ char threeD_stack[20];
+ //stats
+ double total_accesses;
+ double read_accesses;
+ double write_accesses;
+ double read_misses;
+ double write_misses;
+ double conflicts;
+ double duty_cycle;
+} system_L1Directory;
+typedef struct{
+ //params
+ int Directory_type;
+ double Dir_config[20];
+ int buffer_sizes[20];
+ int clockrate;
+ int ports[20];
+ int device_type;
+ int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+ char threeD_stack[20];
+ //stats
+ double total_accesses;
+ double read_accesses;
+ double write_accesses;
+ double read_misses;
+ double write_misses;
+ double conflicts;
+ double duty_cycle;
+} system_L2Directory;
+typedef struct{
+ //params
+ double L2_config[20];
+ int clockrate;
+ int ports[20];
+ int device_type;
+ int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+ char threeD_stack[20];
+ int buffer_sizes[20];
+ //stats
+ double total_accesses;
+ double read_accesses;
+ double write_accesses;
+ double total_hits;
+ double total_misses;
+ double read_hits;
+ double write_hits;
+ double read_misses;
+ double write_misses;
+ double replacements;
+ double write_backs;
+ double miss_buffer_accesses;
+ double fill_buffer_accesses;
+ double prefetch_buffer_accesses;
+ double prefetch_buffer_writes;
+ double prefetch_buffer_reads;
+ double prefetch_buffer_hits;
+ double wbb_writes;
+ double wbb_reads;
+ double conflicts;
+ double duty_cycle;
+
+ bool merged_dir;
+ double homenode_read_accesses;
+ double homenode_write_accesses;
+ double homenode_read_hits;
+ double homenode_write_hits;
+ double homenode_read_misses;
+ double homenode_write_misses;
+ double dir_duty_cycle;
+} system_L2;
+typedef struct{
+ //params
+ double L3_config[20];
+ int clockrate;
+ int ports[20];
+ int device_type;
+ int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
+ char threeD_stack[20];
+ int buffer_sizes[20];
+ //stats
+ double total_accesses;
+ double read_accesses;
+ double write_accesses;
+ double total_hits;
+ double total_misses;
+ double read_hits;
+ double write_hits;
+ double read_misses;
+ double write_misses;
+ double replacements;
+ double write_backs;
+ double miss_buffer_accesses;
+ double fill_buffer_accesses;
+ double prefetch_buffer_accesses;
+ double prefetch_buffer_writes;
+ double prefetch_buffer_reads;
+ double prefetch_buffer_hits;
+ double wbb_writes;
+ double wbb_reads;
+ double conflicts;
+ double duty_cycle;
+
+ bool merged_dir;
+ double homenode_read_accesses;
+ double homenode_write_accesses;
+ double homenode_read_hits;
+ double homenode_write_hits;
+ double homenode_read_misses;
+ double homenode_write_misses;
+ double dir_duty_cycle;
+} system_L3;
+typedef struct{
+ //params
+ int number_of_inputs_of_crossbars;
+ int number_of_outputs_of_crossbars;
+ int flit_bits;
+ int input_buffer_entries_per_port;
+ int ports_of_input_buffer[20];
+ //stats
+ double crossbar_accesses;
+} xbar0_systemNoC;
+typedef struct{
+ //params
+ int clockrate;
+ bool type;
+ bool has_global_link;
+ char topology[20];
+ int horizontal_nodes;
+ int vertical_nodes;
+ int link_throughput;
+ int link_latency;
+ int input_ports;
+ int output_ports;
+ int virtual_channel_per_port;
+ int flit_bits;
+ int input_buffer_entries_per_vc;
+ int ports_of_input_buffer[20];
+ int dual_pump;
+ int number_of_crossbars;
+ char crossbar_type[20];
+ char crosspoint_type[20];
+ xbar0_systemNoC xbar0;
+ int arbiter_type;
+ double chip_coverage;
+ //stats
+ double total_accesses;
+ double duty_cycle;
+ double route_over_perc;
+} system_NoC;
+typedef struct{
+ //params
+ int mem_tech_node;
+ int device_clock;
+ int peak_transfer_rate;
+ int internal_prefetch_of_DRAM_chip;
+ int capacity_per_channel;
+ int number_ranks;
+ int num_banks_of_DRAM_chip;
+ int Block_width_of_DRAM_chip;
+ int output_width_of_DRAM_chip;
+ int page_size_of_DRAM_chip;
+ int burstlength_of_DRAM_chip;
+ //stats
+ double memory_accesses;
+ double memory_reads;
+ double memory_writes;
+} system_mem;
+typedef struct{
+ //params
+ //Common Param for mc and fc
+ double peak_transfer_rate;
+ int number_mcs;
+ bool withPHY;
+ int type;
+
+ //FCParam
+ //stats
+ double duty_cycle;
+ double total_load_perc;
+
+ //McParam
+ int mc_clock;
+ int llc_line_length;
+ int memory_channels_per_mc;
+ int number_ranks;
+ int req_window_size_per_channel;
+ int IO_buffer_size_per_channel;
+ int databus_width;
+ int addressbus_width;
+ bool LVDS;
+
+ //stats
+ double memory_accesses;
+ double memory_reads;
+ double memory_writes;
+} system_mc;
+
+typedef struct{
+ //params
+ int clockrate;
+ int number_units;
+ int type;
+ //stats
+ double duty_cycle;
+ double total_load_perc;
+} system_niu;
+
+typedef struct{
+ //params
+ int clockrate;
+ int number_units;
+ int num_channels;
+ int type;
+ bool withPHY;
+ //stats
+ double duty_cycle;
+ double total_load_perc;
+} system_pcie;
+
+typedef struct{
+ //All number_of_* at the level of 'system' Ying 03/21/2009
+ int number_of_cores;
+ int number_of_L1Directories;
+ int number_of_L2Directories;
+ int number_of_L2s;
+ bool Private_L2;
+ int number_of_L3s;
+ int number_of_NoCs;
+ int number_of_dir_levels;
+ int domain_size;
+ int first_level_dir;
+ // All params at the level of 'system'
+ int homogeneous_cores;
+ int homogeneous_L1Directories;
+ int homogeneous_L2Directories;
+ double core_tech_node;
+ int target_core_clockrate;
+ int target_chip_area;
+ int temperature;
+ int number_cache_levels;
+ int L1_property;
+ int L2_property;
+ int homogeneous_L2s;
+ int L3_property;
+ int homogeneous_L3s;
+ int homogeneous_NoCs;
+ int homogeneous_ccs;
+ int Max_area_deviation;
+ int Max_power_deviation;
+ int device_type;
+ bool longer_channel_device;
+ bool Embedded;
+ bool opt_dynamic_power;
+ bool opt_lakage_power;
+ bool opt_clockrate;
+ bool opt_area;
+ int interconnect_projection_type;
+ int machine_bits;
+ int virtual_address_width;
+ int physical_address_width;
+ int virtual_memory_page_size;
+ double total_cycles;
+ //system.core(0-n):3rd level
+ system_core core[64];
+ system_L1Directory L1Directory[64];
+ system_L2Directory L2Directory[64];
+ system_L2 L2[64];
+ system_L3 L3[64];
+ system_NoC NoC[64];
+ system_mem mem;
+ system_mc mc;
+ system_mc flashc;
+ system_niu niu;
+ system_pcie pcie;
+} root_system;
+
+class ParseXML
+{
+public:
+ void parse(char* filepath);
+ void initialize();
+public:
+ root_system sys;
+};
+
+
+#endif /* XML_PARSE_H_ */
+
+
+
+
diff --git a/ext/mcpat/Xeon.xml b/ext/mcpat/Xeon.xml
new file mode 100644
index 000000000..534210485
--- /dev/null
+++ b/ext/mcpat/Xeon.xml
@@ -0,0 +1,455 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mcpat/arch_const.h b/ext/mcpat/arch_const.h
new file mode 100644
index 000000000..b0dfeaa39
--- /dev/null
+++ b/ext/mcpat/arch_const.h
@@ -0,0 +1,276 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#ifndef ARCH_CONST_H_
+#define ARCH_CONST_H_
+
+typedef struct{
+ unsigned int capacity;
+ unsigned int assoc;//fully
+ unsigned int blocksize;
+} array_inputs;
+
+//Do Not change, unless you want to bypass the XML interface and do not care about the default values.
+//Global parameters
+const int number_of_cores = 8;
+const int number_of_L2s = 1;
+const int number_of_L3s = 1;
+const int number_of_NoCs = 1;
+
+const double archi_F_sz_nm = 90.0;
+const unsigned int dev_type = 0;
+const double CLOCKRATE = 1.2*1e9;
+const double AF = 0.5;
+//const bool inorder = true;
+const bool embedded = false; //NEW
+
+const bool homogeneous_cores = true;
+const bool temperature = 360;
+const int number_cache_levels = 3;
+const int L1_property = 0; //private 0; coherent 1, shared 2.
+const int L2_property = 2;
+const bool homogeneous_L2s = true;
+const bool L3_property = 2;
+const bool homogeneous_L3s = true;
+const double Max_area_deviation = 50;
+const double Max_dynamic_deviation =50; //New
+const int opt_dynamic_power = 1;
+const int opt_lakage_power = 0;
+const int opt_area = 0;
+const int interconnect_projection_type = 0;
+
+//******************************Core Parameters
+#if (inorder)
+const int opcode_length = 8;//Niagara
+const int reg_length = 5;//Niagara
+const int instruction_length = 32;//Niagara
+const int data_width = 64;
+#else
+const int opcode_length = 8;//16;//Niagara
+const int reg_length = 7;//Niagara
+const int instruction_length = 32;//Niagara
+const int data_width = 64;
+#endif
+
+
+//Caches
+//itlb
+const int itlbsize=512;
+const int itlbassoc=0;//fully
+const int itlbblocksize=8;
+//icache
+const int icachesize=32768;
+const int icacheassoc=4;
+const int icacheblocksize=32;
+//dtlb
+const int dtlbsize=512;
+const int dtlbassoc=0;//fully
+const int dtlbblocksize=8;
+//dcache
+const int dcachesize=32768;
+const int dcacheassoc=4;
+const int dcacheblocksize=32;
+const int dcache_write_buffers=8;
+
+//cache controllers
+//IB,
+const int numIBEntries = 64;
+const int IBsize = 64;//2*4*instruction_length/8*2;
+const int IBassoc = 0;//In Niagara it is still fully associ
+const int IBblocksize = 4;
+
+//IFB and MIL should have the same parameters CAM
+const int IFBsize=128;//
+const int IFBassoc=0;//In Niagara it is still fully associ
+const int IFBblocksize=4;
+
+
+
+
+const int icache_write_buffers=8;
+
+//register file RAM
+const int regfilesize=5760;
+const int regfileassoc=1;
+const int regfileblocksize=18;
+//regwin RAM
+const int regwinsize=256;
+const int regwinassoc=1;
+const int regwinblocksize=8;
+
+
+
+//store buffer, lsq
+const int lsqsize=512;
+const int lsqassoc=0;
+const int lsqblocksize=8;
+
+//data fill queue RAM
+const int dfqsize=1024;
+const int dfqassoc=1;
+const int dfqblocksize=16;
+
+//outside the cores
+//L2 cache bank
+const int l2cachesize=262144;
+const int l2cacheassoc=16;
+const int l2cacheblocksize=64;
+
+//L2 directory
+const int l2dirsize=1024;
+const int l2dirassoc=0;
+const int l2dirblocksize=2;
+
+//crossbar
+//PCX
+const int PCX_NUMBER_INPUT_PORTS_CROSSBAR = 8;
+const int PCX_NUMBER_OUTPUT_PORTS_CROSSBAR = 9;
+const int PCX_NUMBER_SIGNALS_PER_PORT_CROSSBAR =144;
+//PCX buffer RAM
+const int pcx_buffersize=1024;
+const int pcx_bufferassoc=1;
+const int pcx_bufferblocksize=32;
+const int pcx_numbuffer=5;
+//pcx arbiter
+const int pcx_arbsize=128;
+const int pcx_arbassoc=1;
+const int pcx_arbblocksize=2;
+const int pcx_numarb=5;
+
+//CPX
+const int CPX_NUMBER_INPUT_PORTS_CROSSBAR = 5;
+const int CPX_NUMBER_OUTPUT_PORTS_CROSSBAR = 8;
+const int CPX_NUMBER_SIGNALS_PER_PORT_CROSSBAR =150;
+//CPX buffer RAM
+const int cpx_buffersize=1024;
+const int cpx_bufferassoc=1;
+const int cpx_bufferblocksize=32;
+const int cpx_numbuffer=8;
+//cpx arbiter
+const int cpx_arbsize=128;
+const int cpx_arbassoc=1;
+const int cpx_arbblocksize=2;
+const int cpx_numarb=8;
+
+
+
+
+
+const int numPhysFloatRegs=256;
+const int numPhysIntRegs=32;
+const int numROBEntries=192;
+const int umRobs=1;
+
+const int BTBEntries=4096;
+const int BTBTagSize=16;
+const int LFSTSize=1024;
+const int LQEntries=32;
+const int RASSize=16;
+const int SQEntries=32;
+const int SSITSize=1024;
+const int activity=0;
+const int backComSize=5;
+const int cachePorts=200;
+const int choiceCtrBits=2;
+const int choicePredictorSize=8192;
+
+
+const int commitWidth=8;
+const int decodeWidth=8;
+const int dispatchWidth=8;
+const int fetchWidth=8;
+const int issueWidth=1;
+const int renameWidth=8;
+//what is this forwardComSize=5??
+
+const int globalCtrBits=2;
+const int globalHistoryBits=13;
+const int globalPredictorSize=8192;
+
+
+
+const int localCtrBits=2;
+const int localHistoryBits=11;
+const int localHistoryTableSize=2048;
+const int localPredictorSize=2048;
+
+const double Woutdrvnandn =30 *0.09;//(24.0 * LSCALE)
+const double Woutdrvnandp =12.5 *0.09;//(10.0 * LSCALE)
+const double Woutdrvnorn =7.5*0.09;//(6.0 * LSCALE)
+const double Woutdrvnorp =50 * 0.09;// (40.0 * LSCALE)
+const double Woutdrivern =60*0.09;//(48.0 * LSCALE)
+const double Woutdriverp =100 * 0.09;//(80.0 * LSCALE)
+
+/*
+smtCommitPolicy=RoundRobin
+smtFetchPolicy=SingleThread
+smtIQPolicy=Partitioned
+smtIQThreshold=100
+smtLSQPolicy=Partitioned
+smtLSQThreshold=100
+smtNumFetchingThreads=1
+smtROBPolicy=Partitioned
+smtROBThreshold=100
+squashWidth=8
+*/
+
+/*
+prefetch_access=false
+prefetch_cache_check_push=true
+prefetch_data_accesses_only=false
+prefetch_degree=1
+prefetch_latency=10000
+prefetch_miss=false
+prefetch_past_page=false
+prefetch_policy=none
+prefetch_serial_squash=false
+prefetch_use_cpu_id=true
+prefetcher_size=100
+prioritizeRequests=false
+repl=Null
+
+
+split=false
+split_size=0
+subblock_size=0
+tgts_per_mshr=20
+trace_addr=0
+two_queue=false
+
+cpu_side=system.cpu0.dcache_port
+mem_side=system.tol2bus.port[2]
+*/
+
+//[system.cpu0.dtb]
+//type=AlphaDT
+
+
+#endif /* ARCH_CONST_H_ */
diff --git a/ext/mcpat/array.cc b/ext/mcpat/array.cc
new file mode 100644
index 000000000..975f82fad
--- /dev/null
+++ b/ext/mcpat/array.cc
@@ -0,0 +1,302 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#define GLOBALVAR
+#include
+#include
+#include
+
+#include "area.h"
+#include "array.h"
+#include "decoder.h"
+#include "globalvar.h"
+#include "parameter.h"
+
+using namespace std;
+
+ArrayST::ArrayST(const InputParameter *configure_interface,
+ string _name,
+ enum Device_ty device_ty_,
+ bool opt_local_,
+ enum Core_type core_ty_,
+ bool _is_default)
+:l_ip(*configure_interface),
+ name(_name),
+ device_ty(device_ty_),
+ opt_local(opt_local_),
+ core_ty(core_ty_),
+ is_default(_is_default)
+ {
+
+ if (l_ip.cache_sz<64) l_ip.cache_sz=64;
+ l_ip.error_checking();//not only do the error checking but also fill some missing parameters
+ optimize_array();
+
+}
+
+
+void ArrayST::compute_base_power()
+ {
+ //l_ip.out_w =l_ip.line_sz*8;
+ local_result=cacti_interface(&l_ip);
+
+ }
+
+void ArrayST::optimize_array()
+{
+ list candidate_solutions(0);
+ list::iterator candidate_iter, min_dynamic_energy_iter;
+
+ uca_org_t * temp_res = 0;
+ local_result.valid=false;
+
+ double throughput=l_ip.throughput, latency=l_ip.latency;
+ double area_efficiency_threshold = 20.0;
+ bool throughput_overflow=true, latency_overflow=true;
+ compute_base_power();
+
+ if ((local_result.cycle_time - throughput) <= 1e-10 )
+ throughput_overflow=false;
+ if ((local_result.access_time - latency)<= 1e-10)
+ latency_overflow=false;
+
+ if (opt_for_clk && opt_local)
+ {
+ if (throughput_overflow || latency_overflow)
+ {
+ l_ip.ed=0;
+
+ l_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
+ l_ip.cycle_time_wt = 1000;
+
+ l_ip.area_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
+ l_ip.dynamic_power_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
+ l_ip.leakage_power_wt = 10;
+
+ l_ip.delay_dev = 1000000;//Fixed number, make sure timing can be satisfied.
+ l_ip.cycle_time_dev = 100;
+
+ l_ip.area_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
+ l_ip.dynamic_power_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
+ l_ip.leakage_power_dev = 1000000;
+
+ throughput_overflow=true; //Reset overflow flag before start optimization iterations
+ latency_overflow=true;
+
+ temp_res = &local_result; //Clean up the result for optimized for ED^2P
+ temp_res->cleanup();
+ }
+
+
+ while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10
+ {
+ compute_base_power();
+
+ l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration
+
+ // from best area to worst area -->worst timing to best timing
+ if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)||
+ (local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0))
+ { //if no satisfiable solution is found,the most aggressive one is left
+ candidate_solutions.push_back(local_result);
+ //output_data_csv(candidate_solutions.back());
+ if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10))
+ //ensure stop opt not because of cam
+ {
+ throughput_overflow=false;
+ latency_overflow=false;
+ }
+
+ }
+ else
+ {
+ //TODO: whether checking the partial satisfied results too, or just change the mark???
+ if ((local_result.cycle_time - throughput) <= 1e-10)
+ throughput_overflow=false;
+ if ((local_result.access_time - latency)<= 1e-10)
+ latency_overflow=false;
+
+ if (l_ip.cycle_time_dev > 10)
+ { //if not >10 local_result is the last result, it cannot be cleaned up
+ temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up
+ temp_res->cleanup();
+ }
+ }
+// l_ip.cycle_time_dev-=10;
+// l_ip.delay_dev-=10;
+
+ }
+
+
+ if (l_ip.assoc > 0)
+ {
+ //For array structures except CAM and FA, Give warning but still provide a result with best timing found
+ if (throughput_overflow==true)
+ cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl;
+ if (latency_overflow==true)
+ cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl;
+ }
+
+// else
+// {
+// /*According to "Content-Addressable Memory (CAM) Circuits and
+// Architectures": A Tutorial and Survey
+// by Kostas Pagiamtzis et al.
+// CAM structures can be heavily pipelined and use look-ahead techniques,
+// therefore timing can be relaxed. But McPAT does not model the advanced
+// techniques. If continue optimizing, the area efficiency will be too low
+// */
+// //For CAM and FA, stop opt if area efficiency is too low
+// if (throughput_overflow==true)
+// cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name
+// <<" array structure because its area efficiency is below "< (candidate_iter)->power.readOp.dynamic)
+ {
+ min_dynamic_energy = (candidate_iter)->power.readOp.dynamic;
+ min_dynamic_energy_iter = candidate_iter;
+ local_result = *(min_dynamic_energy_iter);
+ //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match.
+
+ }
+ else
+ {
+ candidate_iter->cleanup() ;
+ }
+
+ }
+
+
+ }
+ candidate_solutions.clear();
+ }
+
+ double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
+
+ double macro_layout_overhead = g_tp.macro_layout_overhead;
+ double chip_PR_overhead = g_tp.chip_layout_overhead;
+ double total_overhead = macro_layout_overhead*chip_PR_overhead;
+ local_result.area *= total_overhead;
+
+ //maintain constant power density
+ double pppm_t[4] = {total_overhead,1,1,total_overhead};
+
+ double sckRation = g_tp.sckt_co_eff;
+ local_result.power.readOp.dynamic *= sckRation;
+ local_result.power.writeOp.dynamic *= sckRation;
+ local_result.power.searchOp.dynamic *= sckRation;
+ local_result.power.readOp.leakage *= l_ip.nbanks;
+ local_result.power.readOp.longer_channel_leakage =
+ local_result.power.readOp.leakage*long_channel_device_reduction;
+ local_result.power = local_result.power* pppm_t;
+
+ local_result.data_array2->power.readOp.dynamic *= sckRation;
+ local_result.data_array2->power.writeOp.dynamic *= sckRation;
+ local_result.data_array2->power.searchOp.dynamic *= sckRation;
+ local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.data_array2->power.readOp.longer_channel_leakage =
+ local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
+ local_result.data_array2->power = local_result.data_array2->power* pppm_t;
+
+
+ if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
+ {
+ local_result.tag_array2->power.readOp.dynamic *= sckRation;
+ local_result.tag_array2->power.writeOp.dynamic *= sckRation;
+ local_result.tag_array2->power.searchOp.dynamic *= sckRation;
+ local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.tag_array2->power.readOp.longer_channel_leakage =
+ local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
+ local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
+ }
+
+
+}
+
+void ArrayST::leakage_feedback(double temperature)
+{
+ // Update the temperature. l_ip is already set and error-checked in the creator function.
+ l_ip.temp = (unsigned int)round(temperature/10.0)*10;
+
+ // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here.
+ reconfigure(&l_ip,&local_result);
+
+ // Scale the power values. This is part of ArrayST::optimize_array().
+ double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
+
+ double macro_layout_overhead = g_tp.macro_layout_overhead;
+ double chip_PR_overhead = g_tp.chip_layout_overhead;
+ double total_overhead = macro_layout_overhead*chip_PR_overhead;
+
+ double pppm_t[4] = {total_overhead,1,1,total_overhead};
+
+ double sckRation = g_tp.sckt_co_eff;
+ local_result.power.readOp.dynamic *= sckRation;
+ local_result.power.writeOp.dynamic *= sckRation;
+ local_result.power.searchOp.dynamic *= sckRation;
+ local_result.power.readOp.leakage *= l_ip.nbanks;
+ local_result.power.readOp.longer_channel_leakage = local_result.power.readOp.leakage*long_channel_device_reduction;
+ local_result.power = local_result.power* pppm_t;
+
+ local_result.data_array2->power.readOp.dynamic *= sckRation;
+ local_result.data_array2->power.writeOp.dynamic *= sckRation;
+ local_result.data_array2->power.searchOp.dynamic *= sckRation;
+ local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.data_array2->power.readOp.longer_channel_leakage = local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
+ local_result.data_array2->power = local_result.data_array2->power* pppm_t;
+
+ if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
+ {
+ local_result.tag_array2->power.readOp.dynamic *= sckRation;
+ local_result.tag_array2->power.writeOp.dynamic *= sckRation;
+ local_result.tag_array2->power.searchOp.dynamic *= sckRation;
+ local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
+ local_result.tag_array2->power.readOp.longer_channel_leakage = local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
+ local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
+ }
+}
+
+ArrayST:: ~ArrayST()
+{
+ local_result.cleanup();
+}
diff --git a/ext/mcpat/array.h b/ext/mcpat/array.h
new file mode 100644
index 000000000..8c6124d46
--- /dev/null
+++ b/ext/mcpat/array.h
@@ -0,0 +1,101 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#ifndef ARRAY_H_
+#define ARRAY_H_
+
+#include
+#include
+
+#include "basic_components.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "const.h"
+#include "parameter.h"
+
+using namespace std;
+
+class ArrayST :public Component{
+ public:
+ ArrayST(){};
+ ArrayST(const InputParameter *configure_interface, string _name, enum Device_ty device_ty_, bool opt_local_=true, enum Core_type core_ty_=Inorder, bool _is_default=true);
+
+ InputParameter l_ip;
+ string name;
+ enum Device_ty device_ty;
+ bool opt_local;
+ enum Core_type core_ty;
+ bool is_default;
+ uca_org_t local_result;
+
+ statsDef tdp_stats;
+ statsDef rtp_stats;
+ statsDef stats_t;
+ powerDef power_t;
+
+ virtual void optimize_array();
+ virtual void compute_base_power();
+ virtual ~ArrayST();
+
+ void leakage_feedback(double temperature);
+};
+
+class InstCache :public Component{
+public:
+ ArrayST* caches;
+ ArrayST* missb;
+ ArrayST* ifb;
+ ArrayST* prefetchb;
+ powerDef power_t;//temp value holder for both (max) power and runtime power
+ InstCache(){caches=0;missb=0;ifb=0;prefetchb=0;};
+ ~InstCache(){
+ if (caches) {//caches->local_result.cleanup();
+ delete caches; caches=0;}
+ if (missb) {//missb->local_result.cleanup();
+ delete missb; missb=0;}
+ if (ifb) {//ifb->local_result.cleanup();
+ delete ifb; ifb=0;}
+ if (prefetchb) {//prefetchb->local_result.cleanup();
+ delete prefetchb; prefetchb=0;}
+ };
+};
+
+class DataCache :public InstCache{
+public:
+ ArrayST* wbb;
+ DataCache(){wbb=0;};
+ ~DataCache(){
+ if (wbb) {//wbb->local_result.cleanup();
+ delete wbb; wbb=0;}
+ };
+};
+
+#endif /* TLB_H_ */
diff --git a/ext/mcpat/basic_components.cc b/ext/mcpat/basic_components.cc
new file mode 100644
index 000000000..f288d7479
--- /dev/null
+++ b/ext/mcpat/basic_components.cc
@@ -0,0 +1,127 @@
+/*****************************************************************************
+ * McPAT
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#include
+#include
+#include
+
+#include "basic_components.h"
+
+double longer_channel_device_reduction(
+ enum Device_ty device_ty,
+ enum Core_type core_ty)
+{
+
+ double longer_channel_device_percentage_core;
+ double longer_channel_device_percentage_uncore;
+ double longer_channel_device_percentage_llc;
+
+ double long_channel_device_reduction;
+
+ longer_channel_device_percentage_llc = 1.0;
+ longer_channel_device_percentage_uncore = 0.82;
+ if (core_ty==OOO)
+ {
+ longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam
+ //longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam
+
+ }
+ else
+ {
+ longer_channel_device_percentage_core = 0.8;//0.8;//Niagara
+ //longer_channel_device_percentage_uncore = 0.9;//Niagara
+ }
+
+ if (device_ty==Core_device)
+ {
+ long_channel_device_reduction = (1- longer_channel_device_percentage_core)
+ + longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction;
+ }
+ else if (device_ty==Uncore_device)
+ {
+ long_channel_device_reduction = (1- longer_channel_device_percentage_uncore)
+ + longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction;
+ }
+ else if (device_ty==LLC_device)
+ {
+ long_channel_device_reduction = (1- longer_channel_device_percentage_llc)
+ + longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction;
+ }
+ else
+ {
+ cout<<"unknown device category"<
+
+#include "XML_Parse.h"
+#include "parameter.h"
+
+const double cdb_overhead = 1.1;
+
+enum FU_type {
+ FPU,
+ ALU,
+ MUL
+};
+
+enum Core_type {
+ OOO,
+ Inorder
+};
+
+enum Renaming_type {
+ RAMbased,
+ CAMbased
+};
+
+enum Scheduler_type {
+ PhysicalRegFile,
+ ReservationStation
+};
+
+enum cache_level {
+ L2,
+ L3,
+ L1Directory,
+ L2Directory
+};
+
+enum MemoryCtrl_type {
+ MC, //memory controller
+ FLASHC //flash controller
+};
+
+enum Dir_type {
+ ST,//shadowed tag
+ DC,//directory cache
+ SBT,//static bank tag
+ NonDir
+
+};
+
+enum Cache_policy {
+ Write_through,
+ Write_back
+};
+
+enum Device_ty {
+ Core_device,
+ Uncore_device,
+ LLC_device
+};
+
+class statsComponents
+{
+ public:
+ double access;
+ double hit;
+ double miss;
+
+ statsComponents() : access(0), hit(0), miss(0) {}
+ statsComponents(const statsComponents & obj) { *this = obj; }
+ statsComponents & operator=(const statsComponents & rhs)
+ {
+ access = rhs.access;
+ hit = rhs.hit;
+ miss = rhs.miss;
+ return *this;
+ }
+ void reset() { access = 0; hit = 0; miss = 0;}
+
+ friend statsComponents operator+(const statsComponents & x, const statsComponents & y);
+ friend statsComponents operator*(const statsComponents & x, double const * const y);
+};
+
+class statsDef
+{
+ public:
+ statsComponents readAc;
+ statsComponents writeAc;
+ statsComponents searchAc;
+
+ statsDef() : readAc(), writeAc(),searchAc() { }
+ void reset() { readAc.reset(); writeAc.reset();searchAc.reset();}
+
+ friend statsDef operator+(const statsDef & x, const statsDef & y);
+ friend statsDef operator*(const statsDef & x, double const * const y);
+};
+
+double longer_channel_device_reduction(
+ enum Device_ty device_ty=Core_device,
+ enum Core_type core_ty=Inorder);
+
+class CoreDynParam {
+public:
+ CoreDynParam(){};
+ CoreDynParam(ParseXML *XML_interface, int ithCore_);
+ // :XML(XML_interface),
+ // ithCore(ithCore_)
+ // core_ty(inorder),
+ // rm_ty(CAMbased),
+ // scheu_ty(PhysicalRegFile),
+ // clockRate(1e9),//1GHz
+ // arch_ireg_width(32),
+ // arch_freg_width(32),
+ // phy_ireg_width(128),
+ // phy_freg_width(128),
+ // perThreadState(8),
+ // globalCheckpoint(32),
+ // instructionLength(32){};
+ //ParseXML * XML;
+ bool opt_local;
+ bool x86;
+ bool Embedded;
+ enum Core_type core_ty;
+ enum Renaming_type rm_ty;
+ enum Scheduler_type scheu_ty;
+ double clockRate,executionTime;
+ int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width;
+ int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries;
+ int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW;
+ int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length;
+ int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines;
+ int num_alus, num_muls;
+ double num_fpus;
+ int int_data_width, fp_data_width,v_address_width, p_address_width;
+ double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles;
+ bool regWindowing,multithreaded;
+ double pppm_lkg_multhread[4];
+ double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle,
+ MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle,
+ FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle,
+ FPU_cdb_duty_cycle;
+ ~CoreDynParam(){};
+};
+
+class CacheDynParam {
+public:
+ CacheDynParam(){};
+ CacheDynParam(ParseXML *XML_interface, int ithCache_);
+ string name;
+ enum Dir_type dir_ty;
+ double clockRate,executionTime;
+ double capacity, blockW, assoc, nbanks;
+ double throughput, latency;
+ double duty_cycle, dir_duty_cycle;
+ //double duty_cycle;
+ int missb_size, fu_size, prefetchb_size, wbb_size;
+ ~CacheDynParam(){};
+};
+
+class MCParam {
+public:
+ MCParam(){};
+ MCParam(ParseXML *XML_interface, int ithCache_);
+ string name;
+ double clockRate,num_mcs, peakDataTransferRate, num_channels;
+ // double mcTEPowerperGhz;
+ // double mcPHYperGbit;
+ // double area;
+ int llcBlockSize, dataBusWidth, addressBusWidth;
+ int opcodeW;
+ int memAccesses;
+ int memRank;
+ int type;
+ double frontend_duty_cycle, duty_cycle, perc_load;
+ double executionTime, reads, writes;
+ bool LVDS, withPHY;
+
+ ~MCParam(){};
+};
+
+class NoCParam {
+public:
+ NoCParam(){};
+ NoCParam(ParseXML *XML_interface, int ithCache_);
+ string name;
+ double clockRate;
+ int flit_size;
+ int input_ports, output_ports, min_ports, global_linked_ports;
+ int virtual_channel_per_port,input_buffer_entries_per_vc;
+ int horizontal_nodes,vertical_nodes, total_nodes;
+ double executionTime, total_access, link_throughput,link_latency,
+ duty_cycle, chip_coverage, route_over_perc;
+ bool has_global_link, type;
+
+ ~NoCParam(){};
+};
+
+class ProcParam {
+public:
+ ProcParam(){};
+ ProcParam(ParseXML *XML_interface, int ithCache_);
+ string name;
+ int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel;
+ bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir;
+
+ ~ProcParam(){};
+};
+
+class NIUParam {
+public:
+ NIUParam(){};
+ NIUParam(ParseXML *XML_interface, int ithCache_);
+ string name;
+ double clockRate;
+ int num_units;
+ int type;
+ double duty_cycle, perc_load;
+ ~NIUParam(){};
+};
+
+class PCIeParam {
+public:
+ PCIeParam(){};
+ PCIeParam(ParseXML *XML_interface, int ithCache_);
+ string name;
+ double clockRate;
+ int num_channels, num_units;
+ bool withPHY;
+ int type;
+ double duty_cycle, perc_load;
+ ~PCIeParam(){};
+};
+#endif /* BASIC_COMPONENTS_H_ */
diff --git a/ext/mcpat/cacti/README b/ext/mcpat/cacti/README
new file mode 100644
index 000000000..de429d2bb
--- /dev/null
+++ b/ext/mcpat/cacti/README
@@ -0,0 +1,94 @@
+-----------------------------------------------------------
+ ____ _ ____ _____ ___ __ ____
+ / ___| / \ / ___|_ _|_ _| / /_ | ___|
+ | | / _ \| | | | | | | '_ \ |___ \
+ | |___ / ___ \ |___ | | | | | (_) | ___) |
+ \____/_/ \_\____| |_| |___| \___(_)____/
+
+
+ A Tool to Model Caches/Memories
+-----------------------------------------------------------
+
+CACTI is an analytical tool that takes a set of cache/memory para-
+meters as input and calculates its access time, power, cycle
+time, and area.
+CACTI was originally developed by Dr. Jouppi and Dr. Wilton
+in 1993 and since then it has undergone five major
+revisions.
+
+List of features (version 1-6.5):
+===============================
+The following is the list of features supported by the tool.
+
+* Power, delay, area, and cycle time model for
+ direct mapped caches
+ set-associative caches
+ fully associative caches
+ Embedded DRAM memories
+ Commodity DRAM memories
+
+* Support for modeling multi-ported uniform cache access (UCA)
+ and multi-banked, multi-ported non-uniform cache access (NUCA).
+
+* Leakage power calculation that also considers the operating
+ temperature of the cache.
+
+* Router power model.
+
+* Interconnect model with different delay, power, and area
+ properties including low-swing wire model.
+
+* An interface to perform trade-off analysis involving power, delay,
+ area, and bandwidth.
+
+* All process specific values used by the tool are obtained
+ from ITRS and currently, the tool supports 90nm, 65nm, 45nm,
+ and 32nm technology nodes.
+
+Version 6.5 has a new c++ code base and includes numerous bug fixes.
+CACTI 5.3 and 6.0 activate an entire row of mats to read/write a single
+block of data. This technique improves reliability at the cost of
+power. CACTI 6.5 activates minimum number of mats just enough to retrieve
+a block to minimize power.
+
+How to use the tool?
+====================
+Prior versions of CACTI take input parameters such as cache
+size and technology node as a set of command line arguments.
+To avoid a long list of command line arguments,
+CACTI 6.5 lets users specify their cache model in a more
+detailed manner by using a config file (cache.cfg).
+
+-> define the cache model using cache.cfg
+-> run the "cacti" binary <./cacti -infile cache.cfg>
+
+CACTI6.5 also provides a command line interface similar to earlier versions
+of CACTI. The command line interface can be used as
+
+./cacti cache_size line_size associativity rw_ports excl_read_ports excl_write_ports
+ single_ended_read_ports search_ports banks tech_node output_width specific_tag tag_width
+ access_mode cache main_mem obj_func_delay obj_func_dynamic_power obj_func_leakage_power
+ obj_func_cycle_time obj_func_area dev_func_delay dev_func_dynamic_power dev_func_leakage_power
+ dev_func_area dev_func_cycle_time ed_ed2_none temp wt data_arr_ram_cell_tech_flavor_in
+ data_arr_peri_global_tech_flavor_in tag_arr_ram_cell_tech_flavor_in tag_arr_peri_global_tech_flavor_in
+ interconnect_projection_type_in wire_inside_mat_type_in wire_outside_mat_type_in
+ REPEATERS_IN_HTREE_SEGMENTS_in VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in
+ BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in PAGE_SIZE_BITS_in BURST_LENGTH_in
+ INTERNAL_PREFETCH_WIDTH_in force_wiretype wiretype force_config ndwl ndbl nspd ndcm
+ ndsam1 ndsam2 ecc
+
+For complete documentation of the tool, please refer CACTI-5.3 and 6.0
+technical reports and the following paper,
+"Optimizing NUCA Organizations and Wiring Alternatives for
+Large Caches With CACTI 6.0", that appears in MICRO 2007.
+
+We are still improving the tool and refining the code. If you
+have any comments, questions, or suggestions please write to
+us.
+
+Naveen Muralimanohar Jung Ho Ahn Sheng Li
+naveen.muralimanohar@hp.com gajh@snu.ac.kr sheng.li@hp.com
+
+
+
+
diff --git a/ext/mcpat/cacti/Ucache.cc b/ext/mcpat/cacti/Ucache.cc
new file mode 100644
index 000000000..f3e1227df
--- /dev/null
+++ b/ext/mcpat/cacti/Ucache.cc
@@ -0,0 +1,916 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#include "Ucache.h"
+#include "area.h"
+#include "bank.h"
+#include "basic_circuit.h"
+#include "component.h"
+#include "const.h"
+#include "decoder.h"
+#include "parameter.h"
+#include "subarray.h"
+#include "uca.h"
+
+using namespace std;
+
+const uint32_t nthreads = NTHREADS;
+
+
+void min_values_t::update_min_values(const min_values_t * val)
+{
+ min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
+ min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
+ min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
+ min_area = (min_area > val->min_area) ? val->min_area : min_area;
+ min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
+}
+
+
+
+void min_values_t::update_min_values(const uca_org_t & res)
+{
+ min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
+ min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res.area) ? res.area : min_area;
+ min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
+}
+
+void min_values_t::update_min_values(const nuca_org_t * res)
+{
+ min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
+ min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
+ min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
+}
+
+void min_values_t::update_min_values(const mem_array * res)
+{
+ min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
+ min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
+ min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
+ min_area = (min_area > res->area) ? res->area : min_area;
+ min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
+}
+
+
+
+void * calc_time_mt_wrapper(void * void_obj)
+{
+ calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
+ uint32_t tid = calc_obj->tid;
+ list & data_arr = calc_obj->data_arr;
+ list & tag_arr = calc_obj->tag_arr;
+ bool is_tag = calc_obj->is_tag;
+ bool pure_ram = calc_obj->pure_ram;
+ bool pure_cam = calc_obj->pure_cam;
+ bool is_main_mem = calc_obj->is_main_mem;
+ double Nspd_min = calc_obj->Nspd_min;
+ min_values_t * data_res = calc_obj->data_res;
+ min_values_t * tag_res = calc_obj->tag_res;
+
+ data_arr.clear();
+ data_arr.push_back(new mem_array);
+ tag_arr.clear();
+ tag_arr.push_back(new mem_array);
+
+ uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
+ uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
+ uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
+
+
+ bool is_valid_partition;
+ int wt_min, wt_max;
+
+ if (g_ip->force_wiretype) {
+ if (g_ip->wt == 0) {
+ wt_min = Low_swing;
+ wt_max = Low_swing;
+ }
+ else {
+ wt_min = Global;
+ wt_max = Low_swing-1;
+ }
+ }
+ else {
+ wt_min = Global;
+ wt_max = Low_swing;
+ }
+
+ for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2)
+ {
+ for (int wr = wt_min; wr <= wt_max; wr++)
+ {
+ for (uint32_t iter = tid; iter < niter; iter += nthreads)
+ {
+ // reconstruct Ndwl, Ndbl, Ndcm
+ unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
+ unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter);
+ unsigned int Ndcm = 1 << (iter % Ndcm_niter);
+ for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2)
+ {
+ for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2)
+ {
+ //for debuging
+ if (g_ip->force_cache_config && is_tag == false)
+ {
+ wr = g_ip->wt;
+ Ndwl = g_ip->ndwl;
+ Ndbl = g_ip->ndbl;
+ Ndcm = g_ip->ndcm;
+ if(g_ip->nspd != 0) {
+ Nspd = g_ip->nspd;
+ }
+ if(g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = g_ip->ndsam1;
+ Ndsam_lev_2 = g_ip->ndsam2;
+ }
+ }
+
+ if (is_tag == true)
+ {
+ is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ tag_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+ // If it's a fully-associative cache, the data array partition parameters are identical to that of
+ // the tag array, so compute data array partition properties also here.
+ if (is_tag == false || g_ip->fully_assoc)
+ {
+ is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
+ Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
+ data_arr.back(), 0, NULL, NULL,
+ is_main_mem);
+ }
+
+ if (is_valid_partition)
+ {
+ if (is_tag == true)
+ {
+ tag_arr.back()->wt = (enum Wire_type) wr;
+ tag_res->update_min_values(tag_arr.back());
+ tag_arr.push_back(new mem_array);
+ }
+ if (is_tag == false || g_ip->fully_assoc)
+ {
+ data_arr.back()->wt = (enum Wire_type) wr;
+ data_res->update_min_values(data_arr.back());
+ data_arr.push_back(new mem_array);
+ }
+ }
+
+ if (g_ip->force_cache_config && is_tag == false)
+ {
+ wr = wt_max;
+ iter = niter;
+ if(g_ip->nspd != 0) {
+ Nspd = MAXDATASPD;
+ }
+ if (g_ip->ndsam1 != 0) {
+ Ndsam_lev_1 = MAX_COL_MUX+1;
+ Ndsam_lev_2 = MAX_COL_MUX+1;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ delete data_arr.back();
+ delete tag_arr.back();
+ data_arr.pop_back();
+ tag_arr.pop_back();
+
+ pthread_exit(NULL);
+}
+
+
+
+bool calculate_time(
+ bool is_tag,
+ int pure_ram,
+ bool pure_cam,
+ double Nspd,
+ unsigned int Ndwl,
+ unsigned int Ndbl,
+ unsigned int Ndcm,
+ unsigned int Ndsam_lev_1,
+ unsigned int Ndsam_lev_2,
+ mem_array *ptr_array,
+ int flag_results_populate,
+ results_mem_array *ptr_results,
+ uca_org_t *ptr_fin_res,
+ bool is_main_mem)
+{
+ DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
+
+ if (dyn_p.is_valid == false)
+ {
+ return false;
+ }
+
+ UCA * uca = new UCA(dyn_p);
+
+
+ if (flag_results_populate)
+ { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables
+ }
+ else
+ {
+ int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
+ int num_mats = uca->bank.dp.num_mats;
+ bool is_fa = uca->bank.dp.fully_assoc;
+ bool pure_cam = uca->bank.dp.pure_cam;
+ ptr_array->Ndwl = Ndwl;
+ ptr_array->Ndbl = Ndbl;
+ ptr_array->Nspd = Nspd;
+ ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
+ ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
+ ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
+ ptr_array->access_time = uca->access_time;
+ ptr_array->cycle_time = uca->cycle_time;
+ ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time;
+ ptr_array->area_ram_cells = uca->area_all_dataramcells;
+ ptr_array->area = uca->area.get_area();
+ ptr_array->height = uca->area.h;
+ ptr_array->width = uca->area.w;
+ ptr_array->mat_height = uca->bank.mat.area.h;
+ ptr_array->mat_length = uca->bank.mat.area.w;
+ ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
+ ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
+ ptr_array->power = uca->power;
+ ptr_array->delay_senseamp_mux_decoder =
+ MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
+ uca->delay_array_to_sa_mux_lev_2_decoder);
+ ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver;
+ ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out;
+
+ ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
+ ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
+ ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay;
+ ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
+ ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
+ ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
+ ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
+ ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree;
+ ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
+ ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
+
+ ptr_array->all_banks_height = uca->area.h;
+ ptr_array->all_banks_width = uca->area.w;
+ ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area());
+
+ ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
+ ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
+ ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
+// cout<<"power_data_input_htree"<bank.htree_in_data->power.readOp.leakage<power_data_output_htree = uca->bank.htree_out_data->power;
+// cout<<"power_data_output_htree"<bank.htree_out_data->power.readOp.leakage<power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power;
+ ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power;
+ ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
+ ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power;
+ ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power;
+ ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
+ ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders;
+ ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders;
+ ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_bitlines = uca->bank.mat.power_bitline;
+ ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_sense_amps = uca->bank.mat.power_sa;
+ ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv;
+ ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv;
+ ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir;
+
+ ptr_array->power_comparators = uca->bank.mat.power_comparator;
+ ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
+ ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
+
+// cout << " num of mats: " << dyn_p.num_mats << endl;
+ if (is_fa || pure_cam)
+ {
+ ptr_array->power_htree_in_search = uca->bank.htree_in_search->power;
+// cout<<"power_htree_in_search"<bank.htree_in_search->power.readOp.leakage<power_htree_out_search = uca->bank.htree_out_search->power;
+// cout<<"power_htree_out_search"<bank.htree_out_search->power.readOp.leakage<power_searchline = uca->bank.mat.power_searchline;
+// cout<<"power_searchlineh"<bank.mat.power_searchline.readOp.leakage<power_searchline.searchOp.dynamic *= num_mats;
+ ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge;
+ ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchlines = uca->bank.mat.power_matchline;
+ ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge;
+ ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
+ ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv;
+// cout<<"power_matchline.searchOp.leakage"<bank.mat.power_matchline.searchOp.leakage<activate_energy = uca->activate_energy;
+ ptr_array->read_energy = uca->read_energy;
+ ptr_array->write_energy = uca->write_energy;
+ ptr_array->precharge_energy = uca->precharge_energy;
+ ptr_array->refresh_power = uca->refresh_power;
+ ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page;
+ ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page;
+ ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks;
+
+ ptr_array->precharge_delay = uca->precharge_delay;
+
+
+// cout<<"power_matchline.searchOp.leakage"<bank.mat.<bank.mat.subarray.get_total_cell_area()<min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area)/minval->min_area)*100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
+}
+
+bool check_mem_org(mem_array & u, const min_values_t *minval)
+{
+ if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
+ return false;
+ }
+ if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
+ g_ip->dynamic_power_dev) {
+ return false;
+ }
+ if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
+ g_ip->leakage_power_dev) {
+ return false;
+ }
+ if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 >
+ g_ip->cycle_time_dev) {
+ return false;
+ }
+ if (((u.area - minval->min_area)/minval->min_area)*100 >
+ g_ip->area_dev) {
+ return false;
+ }
+ return true;
+}
+
+
+
+
+void find_optimal_uca(uca_org_t *res, min_values_t * minval, list & ulist)
+{
+ double cost = 0;
+ double min_cost = BIGNUM;
+ float d, a, dp, lp, c;
+
+ dp = g_ip->dynamic_power_wt;
+ lp = g_ip->leakage_power_wt;
+ a = g_ip->area_wt;
+ d = g_ip->delay_wt;
+ c = g_ip->cycle_time_wt;
+
+ if (ulist.empty() == true)
+ {
+ cout << "ERROR: no valid cache organizations found" << endl;
+ exit(0);
+ }
+
+ for (list::iterator niter = ulist.begin(); niter != ulist.end(); niter++)
+ {
+ if (g_ip->ed == 1)
+ {
+ cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn);
+ if (min_cost > cost)
+ {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ }
+ else if (g_ip->ed == 2)
+ {
+ cost = ((niter)->access_time/minval->min_delay)*
+ ((niter)->access_time/minval->min_delay)*
+ ((niter)->power.readOp.dynamic/minval->min_dyn);
+ if (min_cost > cost)
+ {
+ min_cost = cost;
+ *res = (*(niter));
+ }
+ }
+ else
+ {
+ /*
+ * check whether the current organization
+ * meets the input deviation constraints
+ */
+ bool v = check_uca_org(*niter, minval);
+ //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
+
+ if (v)
+ {
+ cost = (d * ((niter)->access_time/minval->min_delay) +
+ c * ((niter)->cycle_time/minval->min_cyc) +
+ dp * ((niter)->power.readOp.dynamic/minval->min_dyn) +
+ lp * ((niter)->power.readOp.leakage/minval->min_leakage) +
+ a * ((niter)->area/minval->min_area));
+ //fprintf(stderr, "cost = %g\n", cost);
+
+ if (min_cost > cost) {
+ min_cost = cost;
+ *res = (*(niter));
+ niter = ulist.erase(niter);
+ if (niter!=ulist.begin())
+ niter--;
+ }
+ }
+ else {
+ niter = ulist.erase(niter);
+ if (niter!=ulist.begin())
+ niter--;
+ }
+ }
+ }
+
+ if (min_cost == BIGNUM)
+ {
+ cout << "ERROR: no cache organizations met optimization criteria" << endl;
+ exit(0);
+ }
+}
+
+
+
+void filter_tag_arr(const min_values_t * min, list & list)
+{
+ double cost = BIGNUM;
+ double cur_cost;
+ double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt;
+ mem_array * res = NULL;
+
+ if (list.empty() == true)
+ {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(1);
+ }
+
+
+ while (list.empty() != true)
+ {
+ bool v = check_mem_org(*list.back(), min);
+ if (v)
+ {
+ cur_cost = wt_delay * (list.back()->access_time/min->min_delay) +
+ wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) +
+ wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) +
+ wt_area * (list.back()->area/min->min_area) +
+ wt_cyc * (list.back()->cycle_time/min->min_cyc);
+ }
+ else
+ {
+ cur_cost = BIGNUM;
+ }
+ if (cur_cost < cost)
+ {
+ if (res != NULL)
+ {
+ delete res;
+ }
+ cost = cur_cost;
+ res = list.back();
+ }
+ else
+ {
+ delete list.back();
+ }
+ list.pop_back();
+ }
+ if(!res)
+ {
+ cout << "ERROR: no valid tag organizations found" << endl;
+ exit(0);
+ }
+
+ list.push_back(res);
+}
+
+
+
+void filter_data_arr(list & curr_list)
+{
+ if (curr_list.empty() == true)
+ {
+ cout << "ERROR: no valid data array organizations found" << endl;
+ exit(1);
+ }
+
+ list::iterator iter;
+
+ for (iter = curr_list.begin(); iter != curr_list.end(); ++iter)
+ {
+ mem_array * m = *iter;
+
+ if (m == NULL) exit(1);
+
+ if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) &&
+ ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5))
+ {
+ delete m;
+ iter = curr_list.erase(iter);
+ iter --;
+ }
+ }
+}
+
+
+
+/*
+ * Performs exhaustive search across different sub-array sizes,
+ * wire types and aspect ratios to find an optimal UCA organization
+ * 1. First different valid tag array organizations are calculated
+ * and stored in tag_arr array
+ * 2. The exhaustive search is repeated to find valid data array
+ * organizations and stored in data_arr array
+ * 3. Cache area, delay, power, and cycle time for different
+ * cache organizations are calculated based on the
+ * above results
+ * 4. Cache model with least cost is picked from sol_list
+ */
+void solve(uca_org_t *fin_res)
+{
+ bool is_dram = false;
+ int pure_ram = g_ip->pure_ram;
+ bool pure_cam = g_ip->pure_cam;
+
+ init_tech_params(g_ip->F_sz_um, false);
+
+
+ list tag_arr (0);
+ list data_arr(0);
+ list::iterator miter;
+ list sol_list(1, uca_org_t());
+
+ fin_res->tag_array.access_time = 0;
+ fin_res->tag_array.Ndwl = 0;
+ fin_res->tag_array.Ndbl = 0;
+ fin_res->tag_array.Nspd = 0;
+ fin_res->tag_array.deg_bl_muxing = 0;
+ fin_res->tag_array.Ndsam_lev_1 = 0;
+ fin_res->tag_array.Ndsam_lev_2 = 0;
+
+
+ // distribute calculate_time() execution to multiple threads
+ calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads];
+ pthread_t threads[nthreads];
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].tid = t;
+ calc_array[t].pure_ram = pure_ram;
+ calc_array[t].pure_cam = pure_cam;
+ calc_array[t].data_res = new min_values_t();
+ calc_array[t].tag_res = new min_values_t();
+ }
+
+ bool is_tag;
+ uint32_t ram_cell_tech_type;
+
+ // If it's a cache, first calculate the area, delay and power for all tag array partitions.
+ if (!(pure_ram||pure_cam||g_ip->fully_assoc))
+ { //cache
+ is_tag = true;
+ ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
+ init_tech_params(g_ip->F_sz_um, is_tag);
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = false;
+ calc_array[t].Nspd_min = 0.125;
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+ }
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ pthread_join(threads[t], NULL);
+ }
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
+ calc_array[t].tag_arr.sort(mem_array::lt);
+ tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
+ }
+ }
+
+
+ // calculate the area, delay and power for all data array partitions (for cache or plain RAM).
+// if (!g_ip->fully_assoc)
+// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion
+ is_tag = false;
+ ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type;
+ is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
+ init_tech_params(g_ip->F_sz_um, is_tag);
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].is_tag = is_tag;
+ calc_array[t].is_main_mem = g_ip->is_main_mem;
+ if (!(pure_cam||g_ip->fully_assoc))
+ {
+ calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8);
+ }
+ else
+ {
+ calc_array[t].Nspd_min = 1;
+ }
+
+ pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
+ }
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ pthread_join(threads[t], NULL);
+ }
+
+ data_arr.clear();
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ calc_array[t].data_arr.sort(mem_array::lt);
+ data_arr.merge(calc_array[t].data_arr, mem_array::lt);
+ }
+// }
+
+
+ min_values_t * d_min = new min_values_t();
+ min_values_t * t_min = new min_values_t();
+ min_values_t * cache_min = new min_values_t();
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ d_min->update_min_values(calc_array[t].data_res);
+ t_min->update_min_values(calc_array[t].tag_res);
+ }
+
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
+ {
+ (*miter)->arr_min = d_min;
+ }
+
+
+ //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n";
+ filter_data_arr(data_arr);
+ if(!(pure_ram||pure_cam||g_ip->fully_assoc))
+ {
+ filter_tag_arr(t_min, tag_arr);
+ }
+ //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n";
+
+
+ if (pure_ram||pure_cam||g_ip->fully_assoc)
+ {
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
+ {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = NULL;
+ curr_org.data_array2 = (*miter);
+
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
+
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
+
+ sol_list.push_back(uca_org_t());
+ }
+ }
+ else
+ {
+ while (tag_arr.empty() != true)
+ {
+ mem_array * arr_temp = (tag_arr.back());
+ //delete tag_arr.back();
+ tag_arr.pop_back();
+
+ for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
+ {
+ uca_org_t & curr_org = sol_list.back();
+ curr_org.tag_array2 = arr_temp;
+ curr_org.data_array2 = (*miter);
+
+ curr_org.find_delay();
+ curr_org.find_energy();
+ curr_org.find_area();
+ curr_org.find_cyc();
+
+ //update min values for the entire cache
+ cache_min->update_min_values(curr_org);
+
+ sol_list.push_back(uca_org_t());
+ }
+ }
+ }
+
+ sol_list.pop_back();
+
+ find_optimal_uca(fin_res, cache_min, sol_list);
+
+ sol_list.clear();
+
+ for (miter = data_arr.begin(); miter != data_arr.end(); ++miter)
+ {
+ if (*miter != fin_res->data_array2)
+ {
+ delete *miter;
+ }
+ }
+ data_arr.clear();
+
+ for (uint32_t t = 0; t < nthreads; t++)
+ {
+ delete calc_array[t].data_res;
+ delete calc_array[t].tag_res;
+ }
+
+ delete [] calc_array;
+ delete cache_min;
+ delete d_min;
+ delete t_min;
+}
+
+void update(uca_org_t *fin_res)
+{
+ if(fin_res->tag_array2)
+ {
+ init_tech_params(g_ip->F_sz_um,true);
+ DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ if(tag_arr_dyn_p.is_valid)
+ {
+ UCA * tag_arr = new UCA(tag_arr_dyn_p);
+ fin_res->tag_array2->power = tag_arr->power;
+ }
+ else
+ {
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ exit(1);
+ }
+ }
+ init_tech_params(g_ip->F_sz_um,false);
+ DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
+ if(data_arr_dyn_p.is_valid)
+ {
+ UCA * data_arr = new UCA(data_arr_dyn_p);
+ fin_res->data_array2->power = data_arr->power;
+ }
+ else
+ {
+ cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
+ exit(1);
+ }
+
+ fin_res->find_energy();
+}
+
diff --git a/ext/mcpat/cacti/Ucache.h b/ext/mcpat/cacti/Ucache.h
new file mode 100644
index 000000000..20985fff1
--- /dev/null
+++ b/ext/mcpat/cacti/Ucache.h
@@ -0,0 +1,115 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+#ifndef __UCACHE_H__
+#define __UCACHE_H__
+
+#include
+
+#include "area.h"
+#include "nuca.h"
+#include "router.h"
+
+class min_values_t
+{
+ public:
+ double min_delay;
+ double min_dyn;
+ double min_leakage;
+ double min_area;
+ double min_cyc;
+
+ min_values_t() : min_delay(BIGNUM), min_dyn(BIGNUM), min_leakage(BIGNUM), min_area(BIGNUM), min_cyc(BIGNUM) { }
+
+ void update_min_values(const min_values_t * val);
+ void update_min_values(const uca_org_t & res);
+ void update_min_values(const nuca_org_t * res);
+ void update_min_values(const mem_array * res);
+};
+
+
+
+struct solution
+{
+ int tag_array_index;
+ int data_array_index;
+ list::iterator tag_array_iter;
+ list::iterator data_array_iter;
+ double access_time;
+ double cycle_time;
+ double area;
+ double efficiency;
+ powerDef total_power;
+};
+
+
+
+bool calculate_time(
+ bool is_tag,
+ int pure_ram,
+ bool pure_cam,
+ double Nspd,
+ unsigned int Ndwl,
+ unsigned int Ndbl,
+ unsigned int Ndcm,
+ unsigned int Ndsam_lev_1,
+ unsigned int Ndsam_lev_2,
+ mem_array *ptr_array,
+ int flag_results_populate,
+ results_mem_array *ptr_results,
+ uca_org_t *ptr_fin_res,
+ bool is_main_mem);
+void update(uca_org_t *fin_res);
+
+void solve(uca_org_t *fin_res);
+void init_tech_params(double tech, bool is_tag);
+
+
+struct calc_time_mt_wrapper_struct
+{
+ uint32_t tid;
+ bool is_tag;
+ bool pure_ram;
+ bool pure_cam;
+ bool is_main_mem;
+ double Nspd_min;
+
+ min_values_t * data_res;
+ min_values_t * tag_res;
+
+ list data_arr;
+ list tag_arr;
+};
+
+void *calc_time_mt_wrapper(void * void_obj);
+
+#endif
diff --git a/ext/mcpat/cacti/arbiter.cc b/ext/mcpat/cacti/arbiter.cc
new file mode 100644
index 000000000..6664abf13
--- /dev/null
+++ b/ext/mcpat/cacti/arbiter.cc
@@ -0,0 +1,130 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#include "arbiter.h"
+
+Arbiter::Arbiter(
+ double n_req,
+ double flit_size_,
+ double output_len,
+ TechnologyParameter::DeviceType *dt
+ ):R(n_req), flit_size(flit_size_),
+ o_len (output_len), deviceType(dt)
+{
+ min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
+ Vdd = dt->Vdd;
+ double technology = g_ip->F_sz_um;
+ NTn1 = 13.5*technology/2;
+ PTn1 = 76*technology/2;
+ NTn2 = 13.5*technology/2;
+ PTn2 = 76*technology/2;
+ NTi = 12.5*technology/2;
+ PTi = 25*technology/2;
+ NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
+ PTtr = 20*technology/2; /* pmos tr. length*/
+}
+
+Arbiter::~Arbiter(){}
+
+double
+Arbiter::arb_req() {
+ double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
+ gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
+ drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
+ return temp;
+}
+
+double
+Arbiter::arb_pri() {
+ double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
+ of flip-flop is ignored */
+ return temp;
+}
+
+
+double
+Arbiter::arb_grant() {
+ double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
+ return temp;
+}
+
+double
+Arbiter::arb_int() {
+ double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
+ 2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
+ return temp;
+}
+
+void
+Arbiter::compute_power() {
+ power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
+ arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
+ double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
+ double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
+ double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
+ double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
+ double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
+ double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
+ power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
+ power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
+}
+
+double //wire cap with triple spacing
+Arbiter::Cw3(double length) {
+ Wire wc(g_ip->wt, length, 1, 3, 3);
+ double temp = (wc.wire_cap(length,true));
+ return temp;
+}
+
+double
+Arbiter::crossbar_ctrline() {
+ double temp = (Cw3(o_len * 1e-6 /* m */) +
+ drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
+ gate_C(NTi, 0) + gate_C(PTi, 0));
+ return temp;
+}
+
+double
+Arbiter::transmission_buf_ctrcap() {
+ double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
+ return temp;
+}
+
+
+void Arbiter::print_arbiter()
+{
+ cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
+ cout << "Flit size : " << flit_size << " bits" << endl;
+ cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
+ cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
+}
+
+
diff --git a/ext/mcpat/cacti/arbiter.h b/ext/mcpat/cacti/arbiter.h
new file mode 100644
index 000000000..32ada92c2
--- /dev/null
+++ b/ext/mcpat/cacti/arbiter.h
@@ -0,0 +1,79 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+#ifndef __ARBITER__
+#define __ARBITER__
+
+#include
+
+#include
+
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "mat.h"
+#include "parameter.h"
+#include "wire.h"
+
+class Arbiter : public Component
+{
+ public:
+ Arbiter(
+ double Req,
+ double flit_sz,
+ double output_len,
+ TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
+ ~Arbiter();
+
+ void print_arbiter();
+ double arb_req();
+ double arb_pri();
+ double arb_grant();
+ double arb_int();
+ void compute_power();
+ double Cw3(double len);
+ double crossbar_ctrline();
+ double transmission_buf_ctrcap();
+
+
+
+ private:
+ double NTn1, PTn1, NTn2, PTn2, R, PTi, NTi;
+ double flit_size;
+ double NTtr, PTtr;
+ double o_len;
+ TechnologyParameter::DeviceType *deviceType;
+ double TriS1, TriS2;
+ double min_w_pmos, Vdd;
+
+};
+
+#endif
diff --git a/ext/mcpat/cacti/area.cc b/ext/mcpat/cacti/area.cc
new file mode 100644
index 000000000..14ea4a9d7
--- /dev/null
+++ b/ext/mcpat/cacti/area.cc
@@ -0,0 +1,47 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include
+#include
+#include
+
+#include "area.h"
+#include "basic_circuit.h"
+#include "component.h"
+#include "decoder.h"
+#include "parameter.h"
+
+using namespace std;
+
+
+
diff --git a/ext/mcpat/cacti/area.h b/ext/mcpat/cacti/area.h
new file mode 100644
index 000000000..7705e6250
--- /dev/null
+++ b/ext/mcpat/cacti/area.h
@@ -0,0 +1,71 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __AREA_H__
+#define __AREA_H__
+
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+
+using namespace std;
+
+class Area
+{
+ public:
+ double w;
+ double h;
+
+ Area():w(0), h(0), area(0) { }
+ double get_w() const { return w; }
+ double get_h() const { return h; }
+ double get_area() const
+ {
+ if (w == 0 && h == 0)
+ {
+ return area;
+ }
+ else
+ {
+ return w*h;
+ }
+ }
+ void set_w(double w_) { w = w_; }
+ void set_h(double h_) { h = h_; }
+ void set_area(double a_) { area = a_; }
+
+ private:
+ double area;
+};
+
+#endif
+
diff --git a/ext/mcpat/cacti/bank.cc b/ext/mcpat/cacti/bank.cc
new file mode 100755
index 000000000..a18c7f1ed
--- /dev/null
+++ b/ext/mcpat/cacti/bank.cc
@@ -0,0 +1,198 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#include
+
+#include "bank.h"
+
+Bank::Bank(const DynamicParameter & dyn_p):
+ dp(dyn_p), mat(dp),
+ num_addr_b_mat(dyn_p.number_addr_bits_mat),
+ num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir)
+{
+ int RWP;
+ int ERP;
+ int EWP;
+ int SCHP;
+
+ if (dp.use_inp_params)
+ {
+ RWP = dp.num_rw_ports;
+ ERP = dp.num_rd_ports;
+ EWP = dp.num_wr_ports;
+ SCHP = dp.num_search_ports;
+ }
+ else
+ {
+ RWP = g_ip->num_rw_ports;
+ ERP = g_ip->num_rd_ports;
+ EWP = g_ip->num_wr_ports;
+ SCHP = g_ip->num_search_ports;
+ }
+
+ int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
+ int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
+ int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
+ int searchinbits;
+ int searchoutbits;
+
+ if (dp.fully_assoc || dp.pure_cam)
+ {
+ datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
+ dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
+ searchinbits = dp.num_si_b_bank_per_port * SCHP;
+ searchoutbits = dp.num_so_b_bank_per_port * SCHP;
+ }
+
+ if (!(dp.fully_assoc || dp.pure_cam))
+ {
+ if (g_ip->fast_access && dp.is_tag == false)
+ {
+ dataoutbits *= g_ip->data_assoc;
+ }
+
+ htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
+ htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
+ htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+
+// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
+// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+ }
+ else
+ {
+ htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
+ htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
+ htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
+ htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
+ htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
+ total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
+
+ area.w = htree_in_data->area.w;
+ area.h = htree_in_data->area.h;
+ }
+
+ num_addr_b_row_dec = _log2(mat.subarray.num_rows);
+ num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
+ num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
+}
+
+
+
+Bank::~Bank()
+{
+ delete htree_in_add;
+ delete htree_out_data;
+ delete htree_in_data;
+ if (dp.fully_assoc || dp.pure_cam)
+ {
+ delete htree_in_search;
+ delete htree_out_search;
+ }
+}
+
+
+
+double Bank::compute_delays(double inrisetime)
+{
+ return mat.compute_delays(inrisetime);
+}
+
+
+
+void Bank::compute_power_energy()
+{
+ mat.compute_power_energy();
+
+ if (!(dp.fully_assoc || dp.pure_cam))
+ {
+ power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
+ power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
+ power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
+
+ power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
+ power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
+
+ power.readOp.leakage += htree_in_add->power.readOp.leakage;
+ power.readOp.leakage += htree_in_data->power.readOp.leakage;
+ power.readOp.leakage += htree_out_data->power.readOp.leakage;
+ power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
+ }
+ else
+ {
+
+ power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
+ power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
+ power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
+
+ power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
+ power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
+ mat.power_sa.searchOp.dynamic +
+ mat.power_bitline.searchOp.dynamic +
+ mat.power_subarray_out_drv.searchOp.dynamic+
+ mat.ml_to_ram_wl_drv->power.readOp.dynamic;
+
+ power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
+ power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
+
+ power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
+ power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
+
+ power.readOp.leakage += htree_in_add->power.readOp.leakage;
+ power.readOp.leakage += htree_in_data->power.readOp.leakage;
+ power.readOp.leakage += htree_out_data->power.readOp.leakage;
+ power.readOp.leakage += htree_in_search->power.readOp.leakage;
+ power.readOp.leakage += htree_out_search->power.readOp.leakage;
+
+
+ power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
+ power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
+
+ }
+
+}
+
diff --git a/ext/mcpat/cacti/bank.h b/ext/mcpat/cacti/bank.h
new file mode 100755
index 000000000..153609ab0
--- /dev/null
+++ b/ext/mcpat/cacti/bank.h
@@ -0,0 +1,69 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __BANK_H__
+#define __BANK_H__
+
+#include "component.h"
+#include "decoder.h"
+#include "htree2.h"
+#include "mat.h"
+
+class Bank : public Component
+{
+ public:
+ Bank(const DynamicParameter & dyn_p);
+ ~Bank();
+ double compute_delays(double inrisetime); // return outrisetime
+ void compute_power_energy();
+
+ const DynamicParameter & dp;
+ Mat mat;
+ Htree2 *htree_in_add;
+ Htree2 *htree_in_data;
+ Htree2 *htree_out_data;
+ Htree2 *htree_in_search;
+ Htree2 *htree_out_search;
+
+ int num_addr_b_mat;
+ int num_mats_hor_dir;
+ int num_mats_ver_dir;
+
+ int num_addr_b_row_dec;
+ int num_addr_b_routed_to_mat_for_act;
+ int num_addr_b_routed_to_mat_for_rd_or_wr;
+};
+
+
+
+#endif
diff --git a/ext/mcpat/cacti/basic_circuit.cc b/ext/mcpat/cacti/basic_circuit.cc
new file mode 100644
index 000000000..6efd5dd27
--- /dev/null
+++ b/ext/mcpat/cacti/basic_circuit.cc
@@ -0,0 +1,829 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+
+#include
+#include
+#include
+
+#include "basic_circuit.h"
+#include "parameter.h"
+
+uint32_t _log2(uint64_t num)
+{
+ uint32_t log2 = 0;
+
+ if (num == 0)
+ {
+ std::cerr << "log0?" << std::endl;
+ exit(1);
+ }
+
+ while (num > 1)
+ {
+ num = (num >> 1);
+ log2++;
+ }
+
+ return log2;
+}
+
+
+bool is_pow2(int64_t val)
+{
+ if (val <= 0)
+ {
+ return false;
+ }
+ else if (val == 1)
+ {
+ return true;
+ }
+ else
+ {
+ return (_log2(val) != _log2(val-1));
+ }
+}
+
+
+int powers (int base, int n)
+{
+ int i, p;
+
+ p = 1;
+ for (i = 1; i <= n; ++i)
+ p *= base;
+ return p;
+}
+
+/*----------------------------------------------------------------------*/
+
+double logtwo (double x)
+{
+ assert(x > 0);
+ return ((double) (log (x) / log (2.0)));
+}
+
+/*----------------------------------------------------------------------*/
+
+
+double gate_C(
+ double width,
+ double wirelength,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ const TechnologyParameter::DeviceType * dt;
+
+ if (_is_dram && _is_cell)
+ {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ }
+ else if (_is_dram && _is_wl_tr)
+ {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ }
+ else if (!_is_dram && _is_cell)
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+}
+
+
+// returns gate capacitance in Farads
+// actually this function is the same as gate_C() now
+double gate_C_pass(
+ double width, // gate width in um (length is Lphy_periph_global)
+ double wirelength, // poly wire length going to gate in lambda
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ // v5.0
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ }
+ else if ((_is_dram) && (_is_wl_tr))
+ {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ }
+ else if ((!_is_dram) && _is_cell)
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
+}
+
+
+
+double drain_C_(
+ double width,
+ int nchannel,
+ int stack,
+ int next_arg_thresh_folding_width_or_height_cell,
+ double fold_dimension,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ double w_folded_tr;
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.dram_acc; // DRAM cell access transistor
+ }
+ else if ((_is_dram) && (_is_wl_tr))
+ {
+ dt = &g_tp.dram_wl; // DRAM wordline transistor
+ }
+ else if ((!_is_dram) && _is_cell)
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ double c_junc_area = dt->C_junc;
+ double c_junc_sidewall = dt->C_junc_sidewall;
+ double c_fringe = 2*dt->C_fringe;
+ double c_overlap = 2*dt->C_overlap;
+ double drain_C_metal_connecting_folded_tr = 0;
+
+ // determine the width of the transistor after folding (if it is getting folded)
+ if (next_arg_thresh_folding_width_or_height_cell == 0)
+ { // interpret fold_dimension as the the folding width threshold
+ // i.e. the value of transistor width above which the transistor gets folded
+ w_folded_tr = fold_dimension;
+ }
+ else
+ { // interpret fold_dimension as the height of the cell that this transistor is part of.
+ double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
+ // TODO : w_folded_tr must come from Component::compute_gate_area()
+ double ratio_p_to_n = 2.0 / (2.0 + 1.0);
+ if (nchannel)
+ {
+ w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ }
+ else
+ {
+ w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
+ }
+ }
+ int num_folded_tr = (int) (ceil(width / w_folded_tr));
+
+ if (num_folded_tr < 2)
+ {
+ w_folded_tr = width;
+ }
+
+ double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
+ (stack - 1) * g_tp.spacing_poly_to_poly;
+ double drain_h_for_sidewall = w_folded_tr;
+ double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
+ if (num_folded_tr > 1)
+ {
+ total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
+ (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
+
+ if (num_folded_tr%2 == 0)
+ {
+ drain_h_for_sidewall = 0;
+ }
+ total_drain_height_for_cap_wrt_gate *= num_folded_tr;
+ drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
+ }
+
+ double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
+ double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
+ double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
+
+ return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
+}
+
+
+double tr_R_on(
+ double width,
+ int nchannel,
+ int stack,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ }
+ else if ((_is_dram) && (_is_wl_tr))
+ {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ }
+ else if ((!_is_dram) && _is_cell)
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+ return (stack * restrans / width);
+}
+
+
+/* This routine operates in reverse: given a resistance, it finds
+ * the transistor width that would have this R. It is used in the
+ * data wordline to estimate the wordline driver size. */
+
+// returns width in um
+double R_to_w(
+ double res,
+ int nchannel,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ const TechnologyParameter::DeviceType * dt;
+
+ if ((_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.dram_acc; //DRAM cell access transistor
+ }
+ else if ((_is_dram) && (_is_wl_tr))
+ {
+ dt = &g_tp.dram_wl; //DRAM wordline transistor
+ }
+ else if ((!_is_dram) && (_is_cell))
+ {
+ dt = &g_tp.sram_cell; // SRAM cell access transistor
+ }
+ else
+ {
+ dt = &g_tp.peri_global;
+ }
+
+ double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
+ return (restrans / res);
+}
+
+
+double pmos_to_nmos_sz_ratio(
+ bool _is_dram,
+ bool _is_wl_tr)
+{
+ double p_to_n_sizing_ratio;
+ if ((_is_dram) && (_is_wl_tr))
+ { //DRAM wordline transistor
+ p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
+ }
+ return p_to_n_sizing_ratio;
+}
+
+
+// "Timing Models for MOS Circuits" by Mark Horowitz, 1984
+double horowitz(
+ double inputramptime, // input rise time
+ double tf, // time constant of gate
+ double vs1, // threshold voltage
+ double vs2, // threshold voltage
+ int rise) // whether input rises or fall
+{
+ if (inputramptime == 0 && vs1 == vs2)
+ {
+ return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
+ }
+ double a, b, td;
+
+ a = inputramptime / tf;
+ if (rise == RISE)
+ {
+ b = 0.5;
+ td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2));
+ }
+ else
+ {
+ b = 0.4;
+ td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2));
+ }
+ return (td);
+}
+
+double cmos_Ileak(
+ double nWidth,
+ double pWidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
+}
+
+
+double simplified_nmos_leakage(
+ double nwidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nwidth * dt->I_off_n;
+}
+
+int factorial(int n, int m)
+{
+ int fa = m, i;
+ for (i=m+1; i<=n; i++)
+ fa *=i;
+ return fa;
+}
+
+int combination(int n, int m)
+{
+ int ret;
+ ret = factorial(n, m+1) / factorial(n - m);
+ return ret;
+}
+
+double simplified_pmos_leakage(
+ double pwidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return pwidth * dt->I_off_p;
+}
+
+double cmos_Ig_n(
+ double nWidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return nWidth*dt->I_g_on_n;
+}
+
+double cmos_Ig_p(
+ double pWidth,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr)
+{
+ TechnologyParameter::DeviceType * dt;
+
+ if ((!_is_dram)&&(_is_cell))
+ { //SRAM cell access transistor
+ dt = &(g_tp.sram_cell);
+ }
+ else if ((_is_dram)&&(_is_wl_tr))
+ { //DRAM wordline transistor
+ dt = &(g_tp.dram_wl);
+ }
+ else
+ { //DRAM or SRAM all other transistors
+ dt = &(g_tp.peri_global);
+ }
+ return pWidth*dt->I_g_on_p;
+}
+
+double cmos_Isub_leakage(
+ double nWidth,
+ double pWidth,
+ int fanin,
+ enum Gate_type g_type,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr,
+ enum Half_net_topology topo)
+{
+ assert (fanin>=1);
+ double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
+ double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
+ double Isub=0;
+ int num_states;
+ int num_off_tx;
+
+ num_states = int(pow(2.0, fanin));
+
+ switch (g_type)
+ {
+ case nmos:
+ if (fanin==1)
+ {
+ Isub = nmos_leak/num_states;
+ }
+ else
+ {
+ if (topo==parallel)
+ {
+ Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
+ }
+ else
+ {
+ for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
+ {
+ //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+ Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ }
+ Isub /=num_states;
+ }
+
+ }
+ break;
+ case pmos:
+ if (fanin==1)
+ {
+ Isub = pmos_leak/num_states;
+ }
+ else
+ {
+ if (topo==parallel)
+ {
+ Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
+ }
+ else
+ {
+ for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
+ {
+ //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+ Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ }
+ Isub /=num_states;
+ }
+
+ }
+ break;
+ case inv:
+ Isub = (nmos_leak + pmos_leak)/2;
+ break;
+ case nand:
+ Isub += fanin*pmos_leak;//the pullup network
+ for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network
+ {
+ //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+ Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ }
+ Isub /=num_states;
+ break;
+ case nor:
+ for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network
+ {
+ //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
+ Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
+ }
+ Isub += fanin*nmos_leak;//the pulldown network
+ Isub /=num_states;
+ break;
+ case tri:
+ Isub += (nmos_leak + pmos_leak)/2;//enabled
+ Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power
+ Isub /=2;
+ break;
+ case tg:
+ Isub = (nmos_leak + pmos_leak)/2;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return Isub;
+}
+
+
+double cmos_Ig_leakage(
+ double nWidth,
+ double pWidth,
+ int fanin,
+ enum Gate_type g_type,
+ bool _is_dram,
+ bool _is_cell,
+ bool _is_wl_tr,
+ enum Half_net_topology topo)
+{
+ assert (fanin>=1);
+ double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
+ double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
+ double Ig_on=0;
+ int num_states;
+ int num_on_tx;
+
+ num_states = int(pow(2.0, fanin));
+
+ switch (g_type)
+ {
+ case nmos:
+ if (fanin==1)
+ {
+ Ig_on = nmos_leak/num_states;
+ }
+ else
+ {
+ if (topo==parallel)
+ {
+ for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
+ {
+ Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
+ }
+ }
+ else
+ {
+ Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
+ //num_on_tx is the number of on tx
+ for (num_on_tx=1; num_on_tx
+
+#include
+#include
+#include
+#include
+
+#include "Ucache.h"
+#include "area.h"
+#include "basic_circuit.h"
+#include "cacti_interface.h"
+#include "component.h"
+#include "const.h"
+#include "parameter.h"
+
+using namespace std;
+
+
+bool mem_array::lt(const mem_array * m1, const mem_array * m2)
+{
+ if (m1->Nspd < m2->Nspd) return true;
+ else if (m1->Nspd > m2->Nspd) return false;
+ else if (m1->Ndwl < m2->Ndwl) return true;
+ else if (m1->Ndwl > m2->Ndwl) return false;
+ else if (m1->Ndbl < m2->Ndbl) return true;
+ else if (m1->Ndbl > m2->Ndbl) return false;
+ else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
+ else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
+ else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
+ else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
+ else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
+ else return false;
+}
+
+
+
+void uca_org_t::find_delay()
+{
+ mem_array * data_arr = data_array2;
+ mem_array * tag_arr = tag_array2;
+
+ // check whether it is a regular cache or scratch ram
+ if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
+ {
+ access_time = data_arr->access_time;
+ }
+ // Both tag and data lookup happen in parallel
+ // and the entire set is sent over the data array h-tree without
+ // waiting for the way-select signal --TODO add the corresponding
+ // power overhead Nav
+ else if (g_ip->fast_access == true)
+ {
+ access_time = MAX(tag_arr->access_time, data_arr->access_time);
+ }
+ // Tag is accessed first. On a hit, way-select signal along with the
+ // address is sent to read/write the appropriate block in the data
+ // array
+ else if (g_ip->is_seq_acc == true)
+ {
+ access_time = tag_arr->access_time + data_arr->access_time;
+ }
+ // Normal access: tag array access and data array access happen in parallel.
+ // But, the data array will wait for the way-select and transfer only the
+ // appropriate block over the h-tree.
+ else
+ {
+ access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
+ data_arr->delay_before_subarray_output_driver) +
+ data_arr->delay_from_subarray_output_driver_to_output;
+ }
+}
+
+
+
+void uca_org_t::find_energy()
+{
+ if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache)
+ power = data_array2->power + tag_array2->power;
+ else
+ power = data_array2->power;
+}
+
+
+
+void uca_org_t::find_area()
+{
+ if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false)
+ {
+ cache_ht = data_array2->height;
+ cache_len = data_array2->width;
+ }
+ else
+ {
+ cache_ht = MAX(tag_array2->height, data_array2->height);
+ cache_len = tag_array2->width + data_array2->width;
+ }
+ area = cache_ht * cache_len;
+}
+
+void uca_org_t::adjust_area()
+{
+ double area_adjust;
+ if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
+ {
+ if (data_array2->area_efficiency/100.0<0.2)
+ {
+ //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
+ area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0));
+ cache_ht = cache_ht/area_adjust;
+ cache_len = cache_len/area_adjust;
+ }
+ }
+ area = cache_ht * cache_len;
+}
+
+void uca_org_t::find_cyc()
+{
+ if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false)
+ {
+ cycle_time = data_array2->cycle_time;
+ }
+ else
+ {
+ cycle_time = MAX(tag_array2->cycle_time,
+ data_array2->cycle_time);
+ }
+}
+
+uca_org_t :: uca_org_t()
+:tag_array2(0),
+ data_array2(0)
+{
+
+}
+
+void uca_org_t :: cleanup()
+{
+ if (data_array2!=0)
+ delete data_array2;
+ if (tag_array2!=0)
+ delete tag_array2;
+}
diff --git a/ext/mcpat/cacti/cacti_interface.h b/ext/mcpat/cacti/cacti_interface.h
new file mode 100644
index 000000000..f37596554
--- /dev/null
+++ b/ext/mcpat/cacti/cacti_interface.h
@@ -0,0 +1,633 @@
+/*****************************************************************************
+ * McPAT/CACTI
+ * SOFTWARE LICENSE AGREEMENT
+ * Copyright 2012 Hewlett-Packard Development Company, L.P.
+ * All Rights Reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
+ *
+ ***************************************************************************/
+
+
+
+#ifndef __CACTI_INTERFACE_H__
+#define __CACTI_INTERFACE_H__
+
+#include
+#include
+#include