From 612f8f074fa1099cf70faf495d46cc647762a031 Mon Sep 17 00:00:00 2001
From: ARM gem5 Developers <none@none>
Date: Fri, 24 Jan 2014 15:29:34 -0600
Subject: [PATCH] arm: Add support for ARMv8 (AArch64 & AArch32)

Note: AArch64 and AArch32 interworking is not supported. If you use an AArch64
kernel you are restricted to AArch64 user-mode binaries. This will be addressed
in a later patch.

Note: Virtualization is only supported in AArch32 mode. This will also be fixed
in a later patch.

Contributors:
Giacomo Gabrielli    (TrustZone, LPAE, system-level AArch64, AArch64 NEON, validation)
Thomas Grocutt       (AArch32 Virtualization, AArch64 FP, validation)
Mbou Eyole           (AArch64 NEON, validation)
Ali Saidi            (AArch64 Linux support, code integration, validation)
Edmund Grimley-Evans (AArch64 FP)
William Wang         (AArch64 Linux support)
Rene De Jong         (AArch64 Linux support, performance opt.)
Matt Horsnell        (AArch64 MP, validation)
Matt Evans           (device models, code integration, validation)
Chris Adeniyi-Jones  (AArch64 syscall-emulation)
Prakash Ramrakhyani  (validation)
Dam Sunwoo           (validation)
Chander Sudanthi     (validation)
Stephan Diestelhorst (validation)
Andreas Hansson      (code integration, performance opt.)
Eric Van Hensbergen  (performance opt.)
Gabe Black
---
 configs/common/FSConfig.py                |    3 +-
 configs/common/O3_ARM_v7a.py              |    2 +-
 configs/common/Options.py                 |   11 +
 configs/common/cpu2000.py                 |    2 +-
 configs/example/fs.py                     |    6 +
 configs/example/se.py                     |    9 +-
 ext/libelf/elf_common.h                   |    1 +
 src/arch/arm/ArmISA.py                    |   58 +-
 src/arch/arm/ArmSystem.py                 |   28 +-
 src/arch/arm/ArmTLB.py                    |   29 +-
 src/arch/arm/SConscript                   |    9 +-
 src/arch/arm/decoder.cc                   |   21 +-
 src/arch/arm/decoder.hh                   |   12 +
 src/arch/arm/faults.cc                    | 1292 +++++++-
 src/arch/arm/faults.hh                    |  451 ++-
 src/arch/arm/insts/branch64.cc            |  146 +
 src/arch/arm/insts/branch64.hh            |  166 +
 src/arch/arm/insts/data64.cc              |  203 ++
 src/arch/arm/insts/data64.hh              |  256 ++
 src/arch/arm/insts/fplib.cc               | 3086 +++++++++++++++++++
 src/arch/arm/insts/fplib.hh               |  283 ++
 src/arch/arm/insts/macromem.cc            |  528 +++-
 src/arch/arm/insts/macromem.hh            |  207 +-
 src/arch/arm/insts/mem.cc                 |    5 +-
 src/arch/arm/insts/mem64.cc               |  193 ++
 src/arch/arm/insts/mem64.hh               |  253 ++
 src/arch/arm/insts/misc.cc                |   38 +-
 src/arch/arm/insts/misc.hh                |   55 +-
 src/arch/arm/insts/misc64.cc              |   73 +
 src/arch/arm/insts/misc64.hh              |   92 +
 src/arch/arm/insts/neon64_mem.hh          |  128 +
 src/arch/arm/insts/pred_inst.hh           |   36 +-
 src/arch/arm/insts/static_inst.cc         |  312 +-
 src/arch/arm/insts/static_inst.hh         |   99 +-
 src/arch/arm/insts/vfp.cc                 |  484 +--
 src/arch/arm/insts/vfp.hh                 |  493 ++-
 src/arch/arm/interrupts.cc                |  121 +-
 src/arch/arm/interrupts.hh                |  110 +-
 src/arch/arm/intregs.hh                   |  188 +-
 src/arch/arm/isa.cc                       | 1629 +++++++++-
 src/arch/arm/isa.hh                       |  331 +-
 src/arch/arm/isa/bitfields.isa            |    3 +-
 src/arch/arm/isa/decoder/aarch64.isa      |   48 +
 src/arch/arm/isa/decoder/arm.isa          |   12 +-
 src/arch/arm/isa/decoder/decoder.isa      |   10 +-
 src/arch/arm/isa/decoder/thumb.isa        |   10 +-
 src/arch/arm/isa/formats/aarch64.isa      | 2035 +++++++++++++
 src/arch/arm/isa/formats/branch.isa       |   68 +-
 src/arch/arm/isa/formats/formats.isa      |    8 +-
 src/arch/arm/isa/formats/fp.isa           |  103 +-
 src/arch/arm/isa/formats/mem.isa          |    2 +-
 src/arch/arm/isa/formats/misc.isa         |  236 +-
 src/arch/arm/isa/formats/neon64.isa       | 2626 ++++++++++++++++
 src/arch/arm/isa/formats/uncond.isa       |   15 +-
 src/arch/arm/isa/formats/unimp.isa        |   23 +-
 src/arch/arm/isa/includes.isa             |    9 +-
 src/arch/arm/isa/insts/aarch64.isa        |   58 +
 src/arch/arm/isa/insts/branch.isa         |   29 +-
 src/arch/arm/isa/insts/branch64.isa       |  248 ++
 src/arch/arm/isa/insts/data.isa           |    5 +-
 src/arch/arm/isa/insts/data64.isa         |  465 +++
 src/arch/arm/isa/insts/div.isa            |   12 -
 src/arch/arm/isa/insts/fp.isa             |  154 +-
 src/arch/arm/isa/insts/fp64.isa           |  811 +++++
 src/arch/arm/isa/insts/insts.isa          |   21 +-
 src/arch/arm/isa/insts/ldr.isa            |    8 +-
 src/arch/arm/isa/insts/ldr64.isa          |  446 +++
 src/arch/arm/isa/insts/m5ops.isa          |  212 +-
 src/arch/arm/isa/insts/macromem.isa       |   71 +-
 src/arch/arm/isa/insts/mem.isa            |   32 +-
 src/arch/arm/isa/insts/misc.isa           |  444 ++-
 src/arch/arm/isa/insts/misc64.isa         |  147 +
 src/arch/arm/isa/insts/neon.isa           |  569 +++-
 src/arch/arm/isa/insts/neon64.isa         | 3355 +++++++++++++++++++++
 src/arch/arm/isa/insts/neon64_mem.isa     |  471 +++
 src/arch/arm/isa/insts/str.isa            |    9 +-
 src/arch/arm/isa/insts/str64.isa          |  372 +++
 src/arch/arm/isa/insts/swap.isa           |    7 +-
 src/arch/arm/isa/operands.isa             |  175 +-
 src/arch/arm/isa/templates/basic.isa      |   19 +
 src/arch/arm/isa/templates/branch64.isa   |  141 +
 src/arch/arm/isa/templates/data64.isa     |  279 ++
 src/arch/arm/isa/templates/macromem.isa   |  126 +-
 src/arch/arm/isa/templates/mem.isa        |   22 +-
 src/arch/arm/isa/templates/mem64.isa      |  686 +++++
 src/arch/arm/isa/templates/misc.isa       |  154 +-
 src/arch/arm/isa/templates/misc64.isa     |   91 +
 src/arch/arm/isa/templates/neon.isa       |   24 +-
 src/arch/arm/isa/templates/neon64.isa     |  527 ++++
 src/arch/arm/isa/templates/templates.isa  |   13 +-
 src/arch/arm/isa/templates/vfp.isa        |  105 +-
 src/arch/arm/isa/templates/vfp64.isa      |  140 +
 src/arch/arm/isa_traits.hh                |    7 +-
 src/arch/arm/linux/linux.cc               |  121 +-
 src/arch/arm/linux/linux.hh               |  190 +-
 src/arch/arm/linux/process.cc             | 1310 +++++++-
 src/arch/arm/linux/process.hh             |   61 +-
 src/arch/arm/linux/system.cc              |   26 +-
 src/arch/arm/linux/system.hh              |    4 +-
 src/arch/arm/locked_mem.hh                |   37 +-
 src/arch/arm/miscregs.cc                  | 3126 ++++++++++++++++++-
 src/arch/arm/miscregs.hh                  | 1863 ++++++++++--
 src/arch/arm/nativetrace.cc               |    4 +-
 src/arch/arm/pagetable.hh                 |  220 +-
 src/arch/arm/process.cc                   |  134 +-
 src/arch/arm/process.hh                   |   41 +-
 src/arch/arm/registers.hh                 |   10 +-
 src/arch/arm/remote_gdb.cc                |  183 +-
 src/arch/arm/remote_gdb.hh                |   25 +-
 src/arch/arm/stage2_lookup.cc             |  200 ++
 src/arch/arm/stage2_lookup.hh             |  108 +
 src/arch/arm/stage2_mmu.cc                |  146 +
 src/arch/arm/stage2_mmu.hh                |  115 +
 src/arch/arm/system.cc                    |  143 +-
 src/arch/arm/system.hh                    |  167 +-
 src/arch/arm/table_walker.cc              | 1549 ++++++++--
 src/arch/arm/table_walker.hh              |  687 ++++-
 src/arch/arm/tlb.cc                       | 1252 ++++++--
 src/arch/arm/tlb.hh                       |  195 +-
 src/arch/arm/types.hh                     |  202 +-
 src/arch/arm/utility.cc                   |  865 +++++-
 src/arch/arm/utility.hh                   |  173 +-
 src/arch/arm/vtophys.cc                   |   71 +-
 src/base/loader/elf_object.cc             |   47 +-
 src/base/loader/elf_object.hh             |   15 +-
 src/base/loader/object_file.cc            |   12 +-
 src/base/loader/object_file.hh            |    7 +-
 src/cpu/BaseCPU.py                        |   30 +-
 src/dev/arm/RealView.py                   |   43 +-
 src/dev/arm/SConscript                    |    5 +-
 src/dev/arm/generic_timer.cc              |  204 ++
 src/dev/arm/generic_timer.hh              |  199 ++
 src/dev/arm/gic_pl390.cc                  |    3 +-
 src/dev/arm/vgic.cc                       |  553 ++++
 src/dev/arm/vgic.hh                       |  262 ++
 src/sim/System.py                         |    3 +-
 src/sim/process.cc                        |   13 +-
 src/sim/serialize.hh                      |    2 +-
 src/sim/system.cc                         |   12 +-
 src/sim/system.hh                         |   13 +-
 system/arm/aarch64_bootloader/LICENSE.txt |   28 +
 system/arm/aarch64_bootloader/boot.S      |  124 +
 system/arm/aarch64_bootloader/makefile    |    4 +
 util/cpt_upgrader.py                      |  323 +-
 util/m5/m5op_arm_A64.S                    |  149 +
 145 files changed, 39812 insertions(+), 2579 deletions(-)
 create mode 100644 src/arch/arm/insts/branch64.cc
 create mode 100644 src/arch/arm/insts/branch64.hh
 create mode 100644 src/arch/arm/insts/data64.cc
 create mode 100644 src/arch/arm/insts/data64.hh
 create mode 100644 src/arch/arm/insts/fplib.cc
 create mode 100644 src/arch/arm/insts/fplib.hh
 create mode 100644 src/arch/arm/insts/mem64.cc
 create mode 100644 src/arch/arm/insts/mem64.hh
 create mode 100644 src/arch/arm/insts/misc64.cc
 create mode 100644 src/arch/arm/insts/misc64.hh
 create mode 100644 src/arch/arm/insts/neon64_mem.hh
 create mode 100644 src/arch/arm/isa/decoder/aarch64.isa
 create mode 100644 src/arch/arm/isa/formats/aarch64.isa
 create mode 100644 src/arch/arm/isa/formats/neon64.isa
 create mode 100644 src/arch/arm/isa/insts/aarch64.isa
 create mode 100644 src/arch/arm/isa/insts/branch64.isa
 create mode 100644 src/arch/arm/isa/insts/data64.isa
 create mode 100644 src/arch/arm/isa/insts/fp64.isa
 create mode 100644 src/arch/arm/isa/insts/ldr64.isa
 create mode 100644 src/arch/arm/isa/insts/misc64.isa
 create mode 100644 src/arch/arm/isa/insts/neon64.isa
 create mode 100644 src/arch/arm/isa/insts/neon64_mem.isa
 create mode 100644 src/arch/arm/isa/insts/str64.isa
 create mode 100644 src/arch/arm/isa/templates/branch64.isa
 create mode 100644 src/arch/arm/isa/templates/data64.isa
 create mode 100644 src/arch/arm/isa/templates/mem64.isa
 create mode 100644 src/arch/arm/isa/templates/misc64.isa
 create mode 100644 src/arch/arm/isa/templates/neon64.isa
 create mode 100644 src/arch/arm/isa/templates/vfp64.isa
 create mode 100755 src/arch/arm/stage2_lookup.cc
 create mode 100755 src/arch/arm/stage2_lookup.hh
 create mode 100755 src/arch/arm/stage2_mmu.cc
 create mode 100755 src/arch/arm/stage2_mmu.hh
 create mode 100644 src/dev/arm/generic_timer.cc
 create mode 100644 src/dev/arm/generic_timer.hh
 create mode 100644 src/dev/arm/vgic.cc
 create mode 100644 src/dev/arm/vgic.hh
 create mode 100644 system/arm/aarch64_bootloader/LICENSE.txt
 create mode 100644 system/arm/aarch64_bootloader/boot.S
 create mode 100644 system/arm/aarch64_bootloader/makefile
 create mode 100644 util/m5/m5op_arm_A64.S

diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py
index 8905ba2fa..58ad1a7c9 100644
--- a/configs/common/FSConfig.py
+++ b/configs/common/FSConfig.py
@@ -242,7 +242,8 @@ def makeArmSystem(mem_mode, machine_type, mdesc = None,
         self.realview = VExpress_ELT()
     elif machine_type == "VExpress_EMM":
         self.realview = VExpress_EMM()
-        self.load_addr_mask = 0xffffffff
+    elif machine_type == "VExpress_EMM64":
+        self.realview = VExpress_EMM64()
     else:
         print "Unknown Machine Type"
         sys.exit(1)
diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py
index aedafaf4d..3b7df9988 100644
--- a/configs/common/O3_ARM_v7a.py
+++ b/configs/common/O3_ARM_v7a.py
@@ -139,7 +139,7 @@ class O3_ARM_v7a_3(DerivO3CPU):
     backComSize = 5
     forwardComSize = 5
     numPhysIntRegs = 128
-    numPhysFloatRegs = 128
+    numPhysFloatRegs = 192
     numIQEntries = 32
     numROBEntries = 40
 
diff --git a/configs/common/Options.py b/configs/common/Options.py
index 2cca4ef57..209d24357 100644
--- a/configs/common/Options.py
+++ b/configs/common/Options.py
@@ -94,6 +94,9 @@ def addCommonOptions(parser):
                       default="512MB",
                       help="Specify the physical memory size (single memory)")
 
+    parser.add_option("-l", "--lpae", action="store_true")
+    parser.add_option("-V", "--virtualisation", action="store_true")
+
     # Cache Options
     parser.add_option("--caches", action="store_true")
     parser.add_option("--l2cache", action="store_true")
@@ -197,6 +200,14 @@ def addCommonOptions(parser):
     parser.add_option("--at-instruction", action="store_true", default=False,
         help="""Treat value of --checkpoint-restore or --take-checkpoint as a
                 number of instructions.""")
+    parser.add_option("--spec-input", default="ref", type="choice",
+                      choices=["ref", "test", "train", "smred", "mdred",
+                               "lgred"],
+                      help="Input set size for SPEC CPU2000 benchmarks.")
+    parser.add_option("--arm-iset", default="arm", type="choice",
+                      choices=["arm", "thumb", "aarch64"],
+                      help="ARM instruction set.")
+
 
 def addSEOptions(parser):
     # Benchmark options
diff --git a/configs/common/cpu2000.py b/configs/common/cpu2000.py
index 443399234..5f01d28bf 100644
--- a/configs/common/cpu2000.py
+++ b/configs/common/cpu2000.py
@@ -663,7 +663,7 @@ class vortex(Benchmark):
     stdin = None
 
     def __init__(self, isa, os, input_set):
-        if (isa == 'alpha' or isa == 'arm'):
+        if (isa in ('alpha', 'arm', 'thumb', 'aarch64')):
             self.endian = 'lendian'
         elif (isa == 'sparc' or isa == 'sparc32'):
             self.endian = 'bendian'
diff --git a/configs/example/fs.py b/configs/example/fs.py
index 4cfb3e8e2..cb9b264d2 100644
--- a/configs/example/fs.py
+++ b/configs/example/fs.py
@@ -140,6 +140,12 @@ if options.kernel is not None:
 if options.script is not None:
     test_sys.readfile = options.script
 
+if options.lpae:
+    test_sys.have_lpae = True
+
+if options.virtualisation:
+    test_sys.have_virtualization = True
+
 test_sys.init_param = options.init_param
 
 # For now, assign all the CPUs to the same clock domain
diff --git a/configs/example/se.py b/configs/example/se.py
index f7e7f1a65..d4f3e2dd9 100644
--- a/configs/example/se.py
+++ b/configs/example/se.py
@@ -135,9 +135,14 @@ if options.bench:
     for app in apps:
         try:
             if buildEnv['TARGET_ISA'] == 'alpha':
-                exec("workload = %s('alpha', 'tru64', 'ref')" % app)
+                exec("workload = %s('alpha', 'tru64', '%s')" % (
+                        app, options.spec_input))
+            elif buildEnv['TARGET_ISA'] == 'arm':
+                exec("workload = %s('arm_%s', 'linux', '%s')" % (
+                        app, options.arm_iset, options.spec_input))
             else:
-                exec("workload = %s(buildEnv['TARGET_ISA'], 'linux', 'ref')" % app)
+                exec("workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')" % (
+                        app, options.spec_input))
             multiprocesses.append(workload.makeLiveProcess())
         except:
             print >>sys.stderr, "Unable to find workload for %s: %s" % (buildEnv['TARGET_ISA'], app)
diff --git a/ext/libelf/elf_common.h b/ext/libelf/elf_common.h
index c169e7e40..bad988d87 100644
--- a/ext/libelf/elf_common.h
+++ b/ext/libelf/elf_common.h
@@ -172,6 +172,7 @@ typedef struct {
 #define EM_TINYJ	61	/* Advanced Logic Corp. TinyJ processor. */
 #define EM_X86_64	62	/* Advanced Micro Devices x86-64 */
 #define	EM_AMD64	EM_X86_64	/* Advanced Micro Devices x86-64 (compat) */
+#define EM_AARCH64      183     /* AArch64 64 bit ARM. */
 
 /* Non-standard or deprecated. */
 #define EM_486		6	/* Intel i486. */
diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index 55a589c32..eaec92f4d 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012 ARM Limited
+# Copyright (c) 2012-2013 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -34,8 +34,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 # Authors: Andreas Sandberg
+#          Giacomo Gabrielli
 
 from m5.params import *
+from m5.proxy import *
 from m5.SimObject import SimObject
 
 class ArmISA(SimObject):
@@ -43,12 +45,9 @@ class ArmISA(SimObject):
     cxx_class = 'ArmISA::ISA'
     cxx_header = "arch/arm/isa.hh"
 
-    # 0x35 Implementor is '5' from "M5"
-    # 0x0 Variant
-    # 0xf Architecture from CPUID scheme
-    # 0xc00 Primary part number ("c" or higher implies ARM v7)
-    # 0x0 Revision
-    midr = Param.UInt32(0x350fc000, "Main ID Register")
+    system = Param.System(Parent.any, "System this ISA object belongs to")
+
+    midr = Param.UInt32(0x410fc0f0, "MIDR value")
 
     # See section B4.1.93 - B4.1.94 of the ARM ARM
     #
@@ -56,19 +55,19 @@ class ArmISA(SimObject):
     # Note: ThumbEE is disabled for now since we don't support CP14
     # config registers and jumping to ThumbEE vectors
     id_pfr0 = Param.UInt32(0x00000031, "Processor Feature Register 0")
-    # !Timer | !Virti | !M Profile | !TrustZone | ARMv4
-    id_pfr1 = Param.UInt32(0x00000001, "Processor Feature Register 1")
+    # !Timer | Virti | !M Profile | TrustZone | ARMv4
+    id_pfr1 = Param.UInt32(0x00001011, "Processor Feature Register 1")
 
     # See section B4.1.89 - B4.1.92 of the ARM ARM
     #  VMSAv7 support
-    id_mmfr0 = Param.UInt32(0x00000003, "Memory Model Feature Register 0")
+    id_mmfr0 = Param.UInt32(0x10201103, "Memory Model Feature Register 0")
     id_mmfr1 = Param.UInt32(0x00000000, "Memory Model Feature Register 1")
     # no HW access | WFI stalling | ISB and DSB |
     # all TLB maintenance | no Harvard
     id_mmfr2 = Param.UInt32(0x01230000, "Memory Model Feature Register 2")
     # SuperSec | Coherent TLB | Bcast Maint |
     # BP Maint | Cache Maint Set/way | Cache Maint MVA
-    id_mmfr3 = Param.UInt32(0xF0102211, "Memory Model Feature Register 3")
+    id_mmfr3 = Param.UInt32(0x02102211, "Memory Model Feature Register 3")
 
     # See section B4.1.84 of ARM ARM
     # All values are latest for ARMv7-A profile
@@ -79,5 +78,40 @@ class ArmISA(SimObject):
     id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4")
     id_isar5 = Param.UInt32(0x00000000, "Instruction Set Attribute Register 5")
 
+    fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register")
 
-    fpsid = Param.UInt32(0x410430A0, "Floating-point System ID Register")
+    # [31:0] is implementation defined
+    id_aa64afr0_el1 = Param.UInt64(0x0000000000000000,
+        "AArch64 Auxiliary Feature Register 0")
+    # Reserved for future expansion
+    id_aa64afr1_el1 = Param.UInt64(0x0000000000000000,
+        "AArch64 Auxiliary Feature Register 1")
+
+    # 1 CTX CMPs | 2 WRPs | 2 BRPs | !PMU | !Trace | Debug v8-A
+    id_aa64dfr0_el1 = Param.UInt64(0x0000000000101006,
+        "AArch64 Debug Feature Register 0")
+    # Reserved for future expansion
+    id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000,
+        "AArch64 Debug Feature Register 1")
+
+    # !CRC32 | !SHA2 | !SHA1 | !AES
+    id_aa64isar0_el1 = Param.UInt64(0x0000000000000000,
+        "AArch64 Instruction Set Attribute Register 0")
+    # Reserved for future expansion
+    id_aa64isar1_el1 = Param.UInt64(0x0000000000000000,
+        "AArch64 Instruction Set Attribute Register 1")
+
+    # 4K | 64K | !16K | !BigEndEL0 | !SNSMem | !BigEnd | 8b ASID | 40b PA
+    id_aa64mmfr0_el1 = Param.UInt64(0x0000000000f00002,
+        "AArch64 Memory Model Feature Register 0")
+    # Reserved for future expansion
+    id_aa64mmfr1_el1 = Param.UInt64(0x0000000000000000,
+        "AArch64 Memory Model Feature Register 1")
+
+    # !GICv3 CP15 | AdvSIMD | FP | !EL3 | !EL2 | EL1 (AArch64) | EL0 (AArch64)
+    # (no AArch32/64 interprocessing support for now)
+    id_aa64pfr0_el1 = Param.UInt64(0x0000000000000011,
+        "AArch64 Processor Feature Register 0")
+    # Reserved for future expansion
+    id_aa64pfr1_el1 = Param.UInt64(0x0000000000000000,
+        "AArch64 Processor Feature Register 1")
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index b48c2a29d..39b7ec8ff 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2009 ARM Limited
+# Copyright (c) 2009, 2012-2013 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -44,7 +44,8 @@ class ArmMachineType(Enum):
            'RealView_PBX' : 1901,
            'VExpress_ELT' : 2272,
            'VExpress_CA9' : 2272,
-           'VExpress_EMM' : 2272}
+           'VExpress_EMM' : 2272,
+           'VExpress_EMM64' : 2272}
 
 class ArmSystem(System):
     type = 'ArmSystem'
@@ -54,6 +55,23 @@ class ArmSystem(System):
     boot_loader = Param.String("", "File that contains the boot loader code if any")
     gic_cpu_addr = Param.Addr(0, "Addres of the GIC CPU interface")
     flags_addr = Param.Addr(0, "Address of the flags register for MP booting")
+    have_security = Param.Bool(False,
+        "True if Security Extensions are implemented")
+    have_virtualization = Param.Bool(False,
+        "True if Virtualization Extensions are implemented")
+    have_lpae = Param.Bool(False, "True if LPAE is implemented")
+    have_generic_timer = Param.Bool(False,
+        "True if the Generic Timer extension is implemented")
+    highest_el_is_64 = Param.Bool(False,
+        "True if the register width of the highest implemented exception level "
+        "is 64 bits (ARMv8)")
+    reset_addr_64 = Param.UInt64(0x0,
+        "Reset address if the highest implemented exception level is 64 bits "
+        "(ARMv8)")
+    phys_addr_range_64 = Param.UInt8(40,
+        "Supported physical address range in bits when using AArch64 (ARMv8)")
+    have_large_asid_64 = Param.Bool(False,
+        "True if ASID is 16 bits in AArch64 (ARMv8)")
 
 class LinuxArmSystem(ArmSystem):
     type = 'LinuxArmSystem'
@@ -61,8 +79,10 @@ class LinuxArmSystem(ArmSystem):
     load_addr_mask = 0x0fffffff
     machine_type = Param.ArmMachineType('RealView_PBX',
         "Machine id from http://www.arm.linux.org.uk/developer/machines/")
-    atags_addr = Param.Addr(0x100,
-        "Address where default atags structure should be written")
+    atags_addr = Param.Addr("Address where default atags structure should " \
+                                "be written")
+    boot_release_addr = Param.Addr(0xfff8, "Address where secondary CPUs " \
+                                       "spin waiting boot in the loader")
     dtb_filename = Param.String("",
         "File that contains the Device Tree Blob. Don't use DTB if empty.")
     early_kernel_symbols = Param.Bool(False,
diff --git a/src/arch/arm/ArmTLB.py b/src/arch/arm/ArmTLB.py
index c70dd80c8..01ac8016a 100644
--- a/src/arch/arm/ArmTLB.py
+++ b/src/arch/arm/ArmTLB.py
@@ -1,6 +1,6 @@
 # -*- mode:python -*-
 
-# Copyright (c) 2009 ARM Limited
+# Copyright (c) 2009, 2013 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -42,10 +42,12 @@ from m5.params import *
 from m5.proxy import *
 from MemObject import MemObject
 
+# Basic stage 1 translation objects
 class ArmTableWalker(MemObject):
     type = 'ArmTableWalker'
     cxx_class = 'ArmISA::TableWalker'
     cxx_header = "arch/arm/table_walker.hh"
+    is_stage2 =  Param.Bool(False, "Is this object for stage 2 translation?")
     port = MasterPort("Port for TableWalker to do walk the translation with")
     sys = Param.System(Parent.any, "system object parameter")
     num_squash_per_cycle = Param.Unsigned(2,
@@ -57,3 +59,28 @@ class ArmTLB(SimObject):
     cxx_header = "arch/arm/tlb.hh"
     size = Param.Int(64, "TLB size")
     walker = Param.ArmTableWalker(ArmTableWalker(), "HW Table walker")
+    is_stage2 = Param.Bool(False, "Is this a stage 2 TLB?")
+
+# Stage 2 translation objects, only used when virtualisation is being used
+class ArmStage2TableWalker(ArmTableWalker):
+    is_stage2 = True
+
+class ArmStage2TLB(ArmTLB):
+    size = 32
+    walker = ArmStage2TableWalker()
+    is_stage2 = True
+
+class ArmStage2MMU(SimObject):
+    type = 'ArmStage2MMU'
+    cxx_class = 'ArmISA::Stage2MMU'
+    cxx_header = 'arch/arm/stage2_mmu.hh'
+    tlb = Param.ArmTLB("Stage 1 TLB")
+    stage2_tlb = Param.ArmTLB("Stage 2 TLB")
+
+class ArmStage2IMMU(ArmStage2MMU):
+    tlb = Parent.itb
+    stage2_tlb = ArmStage2TLB(walker = ArmStage2TableWalker())
+
+class ArmStage2DMMU(ArmStage2MMU):
+    tlb = Parent.dtb
+    stage2_tlb = ArmStage2TLB(walker = ArmStage2TableWalker())
diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript
index 8d13a9b2d..aa9ce417b 100644
--- a/src/arch/arm/SConscript
+++ b/src/arch/arm/SConscript
@@ -1,6 +1,6 @@
 # -*- mode:python -*-
 
-# Copyright (c) 2009 ARM Limited
+# Copyright (c) 2009, 2012-2013 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -49,12 +49,17 @@ if env['TARGET_ISA'] == 'arm':
     Dir('isa/formats')
     Source('decoder.cc')
     Source('faults.cc')
+    Source('insts/branch64.cc')
+    Source('insts/data64.cc')
     Source('insts/macromem.cc')
     Source('insts/mem.cc')
+    Source('insts/mem64.cc')
     Source('insts/misc.cc')
+    Source('insts/misc64.cc')
     Source('insts/pred_inst.cc')
     Source('insts/static_inst.cc')
     Source('insts/vfp.cc')
+    Source('insts/fplib.cc')
     Source('interrupts.cc')
     Source('isa.cc')
     Source('linux/linux.cc')
@@ -67,6 +72,8 @@ if env['TARGET_ISA'] == 'arm':
     Source('stacktrace.cc')
     Source('system.cc')
     Source('table_walker.cc')
+    Source('stage2_mmu.cc')
+    Source('stage2_lookup.cc')
     Source('tlb.cc')
     Source('utility.cc')
     Source('vtophys.cc')
diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc
index e957ce0e7..940d85b8e 100644
--- a/src/arch/arm/decoder.cc
+++ b/src/arch/arm/decoder.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2012-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2012 Google
  * All rights reserved.
  *
@@ -47,9 +59,11 @@ Decoder::process()
 
     if (!emi.thumb) {
         emi.instBits = data;
-        emi.sevenAndFour = bits(data, 7) && bits(data, 4);
-        emi.isMisc = (bits(data, 24, 23) == 0x2 &&
-                      bits(data, 20) == 0);
+        if (!emi.aarch64) {
+            emi.sevenAndFour = bits(data, 7) && bits(data, 4);
+            emi.isMisc = (bits(data, 24, 23) == 0x2 &&
+                          bits(data, 20) == 0);
+        }
         consumeBytes(4);
         DPRINTF(Decoder, "Arm inst: %#x.\n", (uint64_t)emi);
     } else {
@@ -112,6 +126,7 @@ Decoder::moreBytes(const PCState &pc, Addr fetchPC, MachInst inst)
     data = inst;
     offset = (fetchPC >= pc.instAddr()) ? 0 : pc.instAddr() - fetchPC;
     emi.thumb = pc.thumb();
+    emi.aarch64 = pc.aarch64();
     emi.fpscrLen = fpscrLen;
     emi.fpscrStride = fpscrStride;
 
diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh
index 72776bcfd..315a3b6ad 100644
--- a/src/arch/arm/decoder.hh
+++ b/src/arch/arm/decoder.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2012 Google
  * All rights reserved.
  *
diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc
index be1c7ecc2..f8313efd2 100644
--- a/src/arch/arm/faults.cc
+++ b/src/arch/arm/faults.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -40,9 +40,15 @@
  *
  * Authors: Ali Saidi
  *          Gabe Black
+ *          Giacomo Gabrielli
+ *          Thomas Grocutt
  */
 
 #include "arch/arm/faults.hh"
+#include "arch/arm/system.hh"
+#include "arch/arm/utility.hh"
+#include "arch/arm/insts/static_inst.hh"
+#include "base/compiler.hh"
 #include "base/trace.hh"
 #include "cpu/base.hh"
 #include "cpu/thread_context.hh"
@@ -52,61 +58,413 @@
 namespace ArmISA
 {
 
-template<> ArmFault::FaultVals ArmFaultVals<Reset>::vals =
-{"reset", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()};
+uint8_t ArmFault::shortDescFaultSources[] = {
+    0x01,  // AlignmentFault
+    0x04,  // InstructionCacheMaintenance
+    0xff,  // SynchExtAbtOnTranslTableWalkL0 (INVALID)
+    0x0c,  // SynchExtAbtOnTranslTableWalkL1
+    0x0e,  // SynchExtAbtOnTranslTableWalkL2
+    0xff,  // SynchExtAbtOnTranslTableWalkL3 (INVALID)
+    0xff,  // SynchPtyErrOnTranslTableWalkL0 (INVALID)
+    0x1c,  // SynchPtyErrOnTranslTableWalkL1
+    0x1e,  // SynchPtyErrOnTranslTableWalkL2
+    0xff,  // SynchPtyErrOnTranslTableWalkL3 (INVALID)
+    0xff,  // TranslationL0 (INVALID)
+    0x05,  // TranslationL1
+    0x07,  // TranslationL2
+    0xff,  // TranslationL3 (INVALID)
+    0xff,  // AccessFlagL0 (INVALID)
+    0x03,  // AccessFlagL1
+    0x06,  // AccessFlagL2
+    0xff,  // AccessFlagL3 (INVALID)
+    0xff,  // DomainL0 (INVALID)
+    0x09,  // DomainL1
+    0x0b,  // DomainL2
+    0xff,  // DomainL3 (INVALID)
+    0xff,  // PermissionL0 (INVALID)
+    0x0d,  // PermissionL1
+    0x0f,  // PermissionL2
+    0xff,  // PermissionL3 (INVALID)
+    0x02,  // DebugEvent
+    0x08,  // SynchronousExternalAbort
+    0x10,  // TLBConflictAbort
+    0x19,  // SynchPtyErrOnMemoryAccess
+    0x16,  // AsynchronousExternalAbort
+    0x18,  // AsynchPtyErrOnMemoryAccess
+    0xff,  // AddressSizeL0 (INVALID)
+    0xff,  // AddressSizeL1 (INVALID)
+    0xff,  // AddressSizeL2 (INVALID)
+    0xff,  // AddressSizeL3 (INVALID)
+    0x40,  // PrefetchTLBMiss
+    0x80   // PrefetchUncacheable
+};
 
-template<> ArmFault::FaultVals ArmFaultVals<UndefinedInstruction>::vals =
-{"Undefined Instruction", 0x04, MODE_UNDEFINED, 4 ,2, false, false,
- FaultStat()} ;
+static_assert(sizeof(ArmFault::shortDescFaultSources) ==
+              ArmFault::NumFaultSources,
+              "Invalid size of ArmFault::shortDescFaultSources[]");
 
-template<> ArmFault::FaultVals ArmFaultVals<SupervisorCall>::vals =
-{"Supervisor Call", 0x08, MODE_SVC, 4, 2, false, false, FaultStat()};
+uint8_t ArmFault::longDescFaultSources[] = {
+    0x21,  // AlignmentFault
+    0xff,  // InstructionCacheMaintenance (INVALID)
+    0xff,  // SynchExtAbtOnTranslTableWalkL0 (INVALID)
+    0x15,  // SynchExtAbtOnTranslTableWalkL1
+    0x16,  // SynchExtAbtOnTranslTableWalkL2
+    0x17,  // SynchExtAbtOnTranslTableWalkL3
+    0xff,  // SynchPtyErrOnTranslTableWalkL0 (INVALID)
+    0x1d,  // SynchPtyErrOnTranslTableWalkL1
+    0x1e,  // SynchPtyErrOnTranslTableWalkL2
+    0x1f,  // SynchPtyErrOnTranslTableWalkL3
+    0xff,  // TranslationL0 (INVALID)
+    0x05,  // TranslationL1
+    0x06,  // TranslationL2
+    0x07,  // TranslationL3
+    0xff,  // AccessFlagL0 (INVALID)
+    0x09,  // AccessFlagL1
+    0x0a,  // AccessFlagL2
+    0x0b,  // AccessFlagL3
+    0xff,  // DomainL0 (INVALID)
+    0x3d,  // DomainL1
+    0x3e,  // DomainL2
+    0xff,  // DomainL3 (RESERVED)
+    0xff,  // PermissionL0 (INVALID)
+    0x0d,  // PermissionL1
+    0x0e,  // PermissionL2
+    0x0f,  // PermissionL3
+    0x22,  // DebugEvent
+    0x10,  // SynchronousExternalAbort
+    0x30,  // TLBConflictAbort
+    0x18,  // SynchPtyErrOnMemoryAccess
+    0x11,  // AsynchronousExternalAbort
+    0x19,  // AsynchPtyErrOnMemoryAccess
+    0xff,  // AddressSizeL0 (INVALID)
+    0xff,  // AddressSizeL1 (INVALID)
+    0xff,  // AddressSizeL2 (INVALID)
+    0xff,  // AddressSizeL3 (INVALID)
+    0x40,  // PrefetchTLBMiss
+    0x80   // PrefetchUncacheable
+};
 
-template<> ArmFault::FaultVals ArmFaultVals<PrefetchAbort>::vals =
-{"Prefetch Abort", 0x0C, MODE_ABORT, 4, 4, true, false, FaultStat()};
+static_assert(sizeof(ArmFault::longDescFaultSources) ==
+              ArmFault::NumFaultSources,
+              "Invalid size of ArmFault::longDescFaultSources[]");
 
-template<> ArmFault::FaultVals ArmFaultVals<DataAbort>::vals =
-{"Data Abort", 0x10, MODE_ABORT, 8, 8, true, false, FaultStat()};
+uint8_t ArmFault::aarch64FaultSources[] = {
+    0x21,  // AlignmentFault
+    0xff,  // InstructionCacheMaintenance (INVALID)
+    0x14,  // SynchExtAbtOnTranslTableWalkL0
+    0x15,  // SynchExtAbtOnTranslTableWalkL1
+    0x16,  // SynchExtAbtOnTranslTableWalkL2
+    0x17,  // SynchExtAbtOnTranslTableWalkL3
+    0x1c,  // SynchPtyErrOnTranslTableWalkL0
+    0x1d,  // SynchPtyErrOnTranslTableWalkL1
+    0x1e,  // SynchPtyErrOnTranslTableWalkL2
+    0x1f,  // SynchPtyErrOnTranslTableWalkL3
+    0x04,  // TranslationL0
+    0x05,  // TranslationL1
+    0x06,  // TranslationL2
+    0x07,  // TranslationL3
+    0x08,  // AccessFlagL0
+    0x09,  // AccessFlagL1
+    0x0a,  // AccessFlagL2
+    0x0b,  // AccessFlagL3
+    // @todo: Section & Page Domain Fault in AArch64?
+    0xff,  // DomainL0 (INVALID)
+    0xff,  // DomainL1 (INVALID)
+    0xff,  // DomainL2 (INVALID)
+    0xff,  // DomainL3 (INVALID)
+    0x0c,  // PermissionL0
+    0x0d,  // PermissionL1
+    0x0e,  // PermissionL2
+    0x0f,  // PermissionL3
+    0xff,  // DebugEvent (INVALID)
+    0x10,  // SynchronousExternalAbort
+    0x30,  // TLBConflictAbort
+    0x18,  // SynchPtyErrOnMemoryAccess
+    0xff,  // AsynchronousExternalAbort (INVALID)
+    0xff,  // AsynchPtyErrOnMemoryAccess (INVALID)
+    0x00,  // AddressSizeL0
+    0x01,  // AddressSizeL1
+    0x02,  // AddressSizeL2
+    0x03,  // AddressSizeL3
+    0x40,  // PrefetchTLBMiss
+    0x80   // PrefetchUncacheable
+};
 
-template<> ArmFault::FaultVals ArmFaultVals<Interrupt>::vals =
-{"IRQ", 0x18, MODE_IRQ, 4, 4, true, false, FaultStat()};
+static_assert(sizeof(ArmFault::aarch64FaultSources) ==
+              ArmFault::NumFaultSources,
+              "Invalid size of ArmFault::aarch64FaultSources[]");
 
-template<> ArmFault::FaultVals ArmFaultVals<FastInterrupt>::vals =
-{"FIQ", 0x1C, MODE_FIQ, 4, 4, true, true, FaultStat()};
+// Fields: name, offset, cur{ELT,ELH}Offset, lowerEL{64,32}Offset, next mode,
+//         {ARM, Thumb, ARM_ELR, Thumb_ELR} PC offset, hyp trap,
+//         {A, F} disable, class, stat
+template<> ArmFault::FaultVals ArmFaultVals<Reset>::vals = {
+    // Some dummy values (the reset vector has an IMPLEMENTATION DEFINED
+    // location in AArch64)
+    "Reset",                 0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC,
+    0, 0, 0, 0, false, true,  true,  EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<UndefinedInstruction>::vals = {
+    "Undefined Instruction", 0x004, 0x000, 0x200, 0x400, 0x600, MODE_UNDEFINED,
+    4, 2, 0, 0, true,  false, false, EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<SupervisorCall>::vals = {
+    "Supervisor Call",       0x008, 0x000, 0x200, 0x400, 0x600, MODE_SVC,
+    4, 2, 4, 2, true,  false, false, EC_SVC_TO_HYP, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<SecureMonitorCall>::vals = {
+    "Secure Monitor Call",   0x008, 0x000, 0x200, 0x400, 0x600, MODE_MON,
+    4, 4, 4, 4, false, true,  true,  EC_SMC_TO_HYP, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<HypervisorCall>::vals = {
+    "Hypervisor Call",       0x008, 0x000, 0x200, 0x400, 0x600, MODE_HYP,
+    4, 4, 4, 4, true,  false, false, EC_HVC, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<PrefetchAbort>::vals = {
+    "Prefetch Abort",        0x00C, 0x000, 0x200, 0x400, 0x600, MODE_ABORT,
+    4, 4, 0, 0, true,  true,  false, EC_PREFETCH_ABORT_TO_HYP, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<DataAbort>::vals = {
+    "Data Abort",            0x010, 0x000, 0x200, 0x400, 0x600, MODE_ABORT,
+    8, 8, 0, 0, true,  true,  false, EC_DATA_ABORT_TO_HYP, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<VirtualDataAbort>::vals = {
+    "Virtual Data Abort",    0x010, 0x000, 0x200, 0x400, 0x600, MODE_ABORT,
+    8, 8, 0, 0, true,  true,  false, EC_INVALID, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<HypervisorTrap>::vals = {
+    // @todo: double check these values
+    "Hypervisor Trap",       0x014, 0x000, 0x200, 0x400, 0x600, MODE_HYP,
+    0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<Interrupt>::vals = {
+    "IRQ",                   0x018, 0x080, 0x280, 0x480, 0x680, MODE_IRQ,
+    4, 4, 0, 0, false, true,  false, EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<VirtualInterrupt>::vals = {
+    "Virtual IRQ",           0x018, 0x080, 0x280, 0x480, 0x680, MODE_IRQ,
+    4, 4, 0, 0, false, true,  false, EC_INVALID, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<FastInterrupt>::vals = {
+    "FIQ",                   0x01C, 0x100, 0x300, 0x500, 0x700, MODE_FIQ,
+    4, 4, 0, 0, false, true,  true,  EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<VirtualFastInterrupt>::vals = {
+    "Virtual FIQ",           0x01C, 0x100, 0x300, 0x500, 0x700, MODE_FIQ,
+    4, 4, 0, 0, false, true,  true,  EC_INVALID, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<SupervisorTrap>::vals = {
+    // Some dummy values (SupervisorTrap is AArch64-only)
+    "Supervisor Trap",   0x014, 0x000, 0x200, 0x400, 0x600, MODE_SVC,
+    0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<SecureMonitorTrap>::vals = {
+    // Some dummy values (SecureMonitorTrap is AArch64-only)
+    "Secure Monitor Trap",   0x014, 0x000, 0x200, 0x400, 0x600, MODE_MON,
+    0, 0, 0, 0, false, false, false, EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<PCAlignmentFault>::vals = {
+    // Some dummy values (PCAlignmentFault is AArch64-only)
+    "PC Alignment Fault",   0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC,
+    0, 0, 0, 0, true, false, false, EC_PC_ALIGNMENT, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<SPAlignmentFault>::vals = {
+    // Some dummy values (SPAlignmentFault is AArch64-only)
+    "SP Alignment Fault",   0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC,
+    0, 0, 0, 0, true, false, false, EC_STACK_PTR_ALIGNMENT, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<SystemError>::vals = {
+    // Some dummy values (SError is AArch64-only)
+    "SError",                0x000, 0x180, 0x380, 0x580, 0x780, MODE_SVC,
+    0, 0, 0, 0, false, true,  true,  EC_SERROR, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals = {
+    // Some dummy values
+    "Pipe Flush",            0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC,
+    0, 0, 0, 0, false, true,  true,  EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals = {
+    // Some dummy values
+    "ArmSev Flush",          0x000, 0x000, 0x000, 0x000, 0x000, MODE_SVC,
+    0, 0, 0, 0, false, true,  true,  EC_UNKNOWN, FaultStat()
+};
+template<> ArmFault::FaultVals ArmFaultVals<IllegalInstSetStateFault>::vals = {
+    // Some dummy values (SPAlignmentFault is AArch64-only)
+    "Illegal Inst Set State Fault",   0x000, 0x000, 0x200, 0x400, 0x600, MODE_SVC,
+    0, 0, 0, 0, true, false, false, EC_ILLEGAL_INST, FaultStat()
+};
 
-template<> ArmFault::FaultVals ArmFaultVals<FlushPipe>::vals =
-{"Pipe Flush", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; // dummy values
-
-template<> ArmFault::FaultVals ArmFaultVals<ArmSev>::vals =
-{"ArmSev Flush", 0x00, MODE_SVC, 0, 0, true, true, FaultStat()}; // dummy values
-Addr 
+Addr
 ArmFault::getVector(ThreadContext *tc)
 {
-    // ARM ARM B1-3
+    Addr base;
 
-    SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR);
+    // ARM ARM issue C B1.8.1
+    bool haveSecurity = ArmSystem::haveSecurity(tc);
 
     // panic if SCTLR.VE because I have no idea what to do with vectored
     // interrupts
+    SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR);
     assert(!sctlr.ve);
+    // Check for invalid modes
+    CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR);
+    assert(haveSecurity                      || cpsr.mode != MODE_MON);
+    assert(ArmSystem::haveVirtualization(tc) || cpsr.mode != MODE_HYP);
 
-    if (!sctlr.v)
-        return offset();
-    return offset() + HighVecs;
-
+    switch (cpsr.mode)
+    {
+      case MODE_MON:
+        base = tc->readMiscReg(MISCREG_MVBAR);
+        break;
+      case MODE_HYP:
+        base = tc->readMiscReg(MISCREG_HVBAR);
+        break;
+      default:
+        if (sctlr.v) {
+            base = HighVecs;
+        } else {
+            base = haveSecurity ? tc->readMiscReg(MISCREG_VBAR) : 0;
+        }
+        break;
+    }
+    return base + offset(tc);
 }
 
-void 
+Addr
+ArmFault::getVector64(ThreadContext *tc)
+{
+    Addr vbar;
+    switch (toEL) {
+      case EL3:
+        assert(ArmSystem::haveSecurity(tc));
+        vbar = tc->readMiscReg(MISCREG_VBAR_EL3);
+        break;
+      // @todo: uncomment this to enable Virtualization
+      // case EL2:
+      //   assert(ArmSystem::haveVirtualization(tc));
+      //   vbar = tc->readMiscReg(MISCREG_VBAR_EL2);
+      //   break;
+      case EL1:
+        vbar = tc->readMiscReg(MISCREG_VBAR_EL1);
+        break;
+      default:
+        panic("Invalid target exception level");
+        break;
+    }
+    return vbar + offset64();
+}
+
+MiscRegIndex
+ArmFault::getSyndromeReg64() const
+{
+    switch (toEL) {
+      case EL1:
+        return MISCREG_ESR_EL1;
+      case EL2:
+        return MISCREG_ESR_EL2;
+      case EL3:
+        return MISCREG_ESR_EL3;
+      default:
+        panic("Invalid exception level");
+        break;
+    }
+}
+
+MiscRegIndex
+ArmFault::getFaultAddrReg64() const
+{
+    switch (toEL) {
+      case EL1:
+        return MISCREG_FAR_EL1;
+      case EL2:
+        return MISCREG_FAR_EL2;
+      case EL3:
+        return MISCREG_FAR_EL3;
+      default:
+        panic("Invalid exception level");
+        break;
+    }
+}
+
+void
+ArmFault::setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg)
+{
+    uint32_t value;
+    uint32_t exc_class = (uint32_t) ec(tc);
+    uint32_t issVal = iss();
+    assert(!from64 || ArmSystem::highestELIs64(tc));
+
+    value = exc_class << 26;
+
+    // HSR.IL not valid for Prefetch Aborts (0x20, 0x21) and Data Aborts (0x24,
+    // 0x25) for which the ISS information is not valid (ARMv7).
+    // @todo: ARMv8 revises AArch32 functionality: when HSR.IL is not
+    // valid it is treated as RES1.
+    if (to64) {
+        value |= 1 << 25;
+    } else if ((bits(exc_class, 5, 3) != 4) ||
+               (bits(exc_class, 2) && bits(issVal, 24))) {
+        if (!machInst.thumb || machInst.bigThumb)
+            value |= 1 << 25;
+    }
+    // Condition code valid for EC[5:4] nonzero
+    if (!from64 && ((bits(exc_class, 5, 4) == 0) &&
+                    (bits(exc_class, 3, 0) != 0))) {
+        if (!machInst.thumb) {
+            uint32_t      cond;
+            ConditionCode condCode = (ConditionCode) (uint32_t) machInst.condCode;
+            // If its on unconditional instruction report with a cond code of
+            // 0xE, ie the unconditional code
+            cond  = (condCode == COND_UC) ? COND_AL : condCode;
+            value |= cond << 20;
+            value |= 1    << 24;
+        }
+        value |= bits(issVal, 19, 0);
+    } else {
+        value |= issVal;
+    }
+    tc->setMiscReg(syndrome_reg, value);
+}
+
+void
 ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst)
 {
-    // ARM ARM B1.6.3
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+
+    if (ArmSystem::highestELIs64(tc)) {  // ARMv8
+        // Determine source exception level and mode
+        fromMode = (OperatingMode) (uint8_t) cpsr.mode;
+        fromEL = opModeToEL(fromMode);
+        if (opModeIs64(fromMode))
+            from64 = true;
+
+        // Determine target exception level
+        if (ArmSystem::haveSecurity(tc) && routeToMonitor(tc))
+            toEL = EL3;
+        else
+            toEL = opModeToEL(nextMode());
+        if (fromEL > toEL)
+            toEL = fromEL;
+
+        if (toEL == ArmSystem::highestEL(tc) || ELIs64(tc, toEL)) {
+            // Invoke exception handler in AArch64 state
+            to64 = true;
+            invoke64(tc, inst);
+            return;
+        }
+    }
+
+    // ARMv7 (ARM ARM issue C B1.9)
+
+    bool have_security       = ArmSystem::haveSecurity(tc);
+    bool have_virtualization = ArmSystem::haveVirtualization(tc);
+
     FaultBase::invoke(tc);
     if (!FullSystem)
         return;
     countStat()++;
 
     SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR);
-    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    SCR scr = tc->readMiscReg(MISCREG_SCR);
     CPSR saved_cpsr = tc->readMiscReg(MISCREG_CPSR);
     saved_cpsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ);
     saved_cpsr.c = tc->readIntReg(INTREG_CONDCODES_C);
@@ -118,22 +476,73 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst)
     saved_cpsr.it2 = it.top6;
     saved_cpsr.it1 = it.bottom2;
 
-    cpsr.mode = nextMode();
+    // if we have a valid instruction then use it to annotate this fault with
+    // extra information. This is used to generate the correct fault syndrome
+    // information
+    if (inst) {
+        ArmStaticInst *armInst = reinterpret_cast<ArmStaticInst *>(inst.get());
+        armInst->annotateFault(this);
+    }
+
+    if (have_security && routeToMonitor(tc))
+        cpsr.mode = MODE_MON;
+    else if (have_virtualization && routeToHyp(tc))
+        cpsr.mode = MODE_HYP;
+    else
+        cpsr.mode = nextMode();
+
+    // Ensure Secure state if initially in Monitor mode
+    if (have_security && saved_cpsr.mode == MODE_MON) {
+        SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+        if (scr.ns) {
+            scr.ns = 0;
+            tc->setMiscRegNoEffect(MISCREG_SCR, scr);
+        }
+    }
+
+    // some bits are set differently if we have been routed to hyp mode
+    if (cpsr.mode == MODE_HYP) {
+        SCTLR hsctlr = tc->readMiscReg(MISCREG_HSCTLR);
+        cpsr.t = hsctlr.te;
+        cpsr.e = hsctlr.ee;
+        if (!scr.ea)  {cpsr.a = 1;}
+        if (!scr.fiq) {cpsr.f = 1;}
+        if (!scr.irq) {cpsr.i = 1;}
+    } else if (cpsr.mode == MODE_MON) {
+        // Special case handling when entering monitor mode
+        cpsr.t = sctlr.te;
+        cpsr.e = sctlr.ee;
+        cpsr.a = 1;
+        cpsr.f = 1;
+        cpsr.i = 1;
+    } else {
+        cpsr.t = sctlr.te;
+        cpsr.e = sctlr.ee;
+
+        // The *Disable functions are virtual and different per fault
+        cpsr.a = cpsr.a | abortDisable(tc);
+        cpsr.f = cpsr.f | fiqDisable(tc);
+        cpsr.i = 1;
+    }
     cpsr.it1 = cpsr.it2 = 0;
     cpsr.j = 0;
-   
-    cpsr.t = sctlr.te;
-    cpsr.a = cpsr.a | abortDisable();
-    cpsr.f = cpsr.f | fiqDisable();
-    cpsr.i = 1;
-    cpsr.e = sctlr.ee;
     tc->setMiscReg(MISCREG_CPSR, cpsr);
+
     // Make sure mailbox sets to one always
     tc->setMiscReg(MISCREG_SEV_MAILBOX, 1);
-    tc->setIntReg(INTREG_LR, curPc +
-            (saved_cpsr.t ? thumbPcOffset() : armPcOffset()));
 
-    switch (nextMode()) {
+    // Clear the exclusive monitor
+    tc->setMiscReg(MISCREG_LOCKFLAG, 0);
+
+    if (cpsr.mode == MODE_HYP) {
+        tc->setMiscReg(MISCREG_ELR_HYP, curPc +
+                (saved_cpsr.t ? thumbPcOffset(true)  : armPcOffset(true)));
+    } else {
+        tc->setIntReg(INTREG_LR, curPc +
+                (saved_cpsr.t ? thumbPcOffset(false) : armPcOffset(false)));
+    }
+
+    switch (cpsr.mode) {
       case MODE_FIQ:
         tc->setMiscReg(MISCREG_SPSR_FIQ, saved_cpsr);
         break;
@@ -143,12 +552,23 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst)
       case MODE_SVC:
         tc->setMiscReg(MISCREG_SPSR_SVC, saved_cpsr);
         break;
-      case MODE_UNDEFINED:
-        tc->setMiscReg(MISCREG_SPSR_UND, saved_cpsr);
+      case MODE_MON:
+        assert(have_security);
+        tc->setMiscReg(MISCREG_SPSR_MON, saved_cpsr);
         break;
       case MODE_ABORT:
         tc->setMiscReg(MISCREG_SPSR_ABT, saved_cpsr);
         break;
+      case MODE_UNDEFINED:
+        tc->setMiscReg(MISCREG_SPSR_UND, saved_cpsr);
+        if (ec(tc) != EC_UNKNOWN)
+            setSyndrome(tc, MISCREG_HSR);
+        break;
+      case MODE_HYP:
+        assert(have_virtualization);
+        tc->setMiscReg(MISCREG_SPSR_HYP, saved_cpsr);
+        setSyndrome(tc, MISCREG_HSR);
+        break;
       default:
         panic("unknown Mode\n");
     }
@@ -161,9 +581,102 @@ ArmFault::invoke(ThreadContext *tc, StaticInstPtr inst)
     pc.nextThumb(pc.thumb());
     pc.jazelle(cpsr.j);
     pc.nextJazelle(pc.jazelle());
+    pc.aarch64(!cpsr.width);
+    pc.nextAArch64(!cpsr.width);
     tc->pcState(pc);
 }
 
+void
+ArmFault::invoke64(ThreadContext *tc, StaticInstPtr inst)
+{
+    // Determine actual misc. register indices for ELR_ELx and SPSR_ELx
+    MiscRegIndex elr_idx, spsr_idx;
+    switch (toEL) {
+      case EL1:
+        elr_idx = MISCREG_ELR_EL1;
+        spsr_idx = MISCREG_SPSR_EL1;
+        break;
+      // @todo: uncomment this to enable Virtualization
+      // case EL2:
+      //   assert(ArmSystem::haveVirtualization());
+      //   elr_idx = MISCREG_ELR_EL2;
+      //   spsr_idx = MISCREG_SPSR_EL2;
+      //   break;
+      case EL3:
+        assert(ArmSystem::haveSecurity(tc));
+        elr_idx = MISCREG_ELR_EL3;
+        spsr_idx = MISCREG_SPSR_EL3;
+        break;
+      default:
+        panic("Invalid target exception level");
+        break;
+    }
+
+    // Save process state into SPSR_ELx
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    CPSR spsr = cpsr;
+    spsr.nz = tc->readIntReg(INTREG_CONDCODES_NZ);
+    spsr.c = tc->readIntReg(INTREG_CONDCODES_C);
+    spsr.v = tc->readIntReg(INTREG_CONDCODES_V);
+    if (from64) {
+        // Force some bitfields to 0
+        spsr.q = 0;
+        spsr.it1 = 0;
+        spsr.j = 0;
+        spsr.res0_23_22 = 0;
+        spsr.ge = 0;
+        spsr.it2 = 0;
+        spsr.t = 0;
+    } else {
+        spsr.ge = tc->readIntReg(INTREG_CONDCODES_GE);
+        ITSTATE it = tc->pcState().itstate();
+        spsr.it2 = it.top6;
+        spsr.it1 = it.bottom2;
+        // Force some bitfields to 0
+        spsr.res0_23_22 = 0;
+        spsr.ss = 0;
+    }
+    tc->setMiscReg(spsr_idx, spsr);
+
+    // Save preferred return address into ELR_ELx
+    Addr curr_pc = tc->pcState().pc();
+    Addr ret_addr = curr_pc;
+    if (from64)
+        ret_addr += armPcElrOffset();
+    else
+        ret_addr += spsr.t ? thumbPcElrOffset() : armPcElrOffset();
+    tc->setMiscReg(elr_idx, ret_addr);
+
+    // Update process state
+    OperatingMode64 mode = 0;
+    mode.spX = 1;
+    mode.el = toEL;
+    mode.width = 0;
+    cpsr.mode = mode;
+    cpsr.daif = 0xf;
+    cpsr.il = 0;
+    cpsr.ss = 0;
+    tc->setMiscReg(MISCREG_CPSR, cpsr);
+
+    // Set PC to start of exception handler
+    Addr new_pc = purifyTaggedAddr(getVector64(tc), tc, toEL);
+    DPRINTF(Faults, "Invoking Fault (AArch64 target EL):%s cpsr:%#x PC:%#x "
+            "elr:%#x newVec: %#x\n", name(), cpsr, curr_pc, ret_addr, new_pc);
+    PCState pc(new_pc);
+    pc.aarch64(!cpsr.width);
+    pc.nextAArch64(!cpsr.width);
+    tc->pcState(pc);
+
+    // If we have a valid instruction then use it to annotate this fault with
+    // extra information. This is used to generate the correct fault syndrome
+    // information
+    if (inst)
+        reinterpret_cast<ArmStaticInst *>(inst.get())->annotateFault(this);
+    // Save exception syndrome
+    if ((nextMode() != MODE_IRQ) && (nextMode() != MODE_FIQ))
+        setSyndrome(tc, getSyndromeReg64());
+}
+
 void
 Reset::invoke(ThreadContext *tc, StaticInstPtr inst)
 {
@@ -171,7 +684,25 @@ Reset::invoke(ThreadContext *tc, StaticInstPtr inst)
         tc->getCpuPtr()->clearInterrupts();
         tc->clearArchRegs();
     }
-    ArmFault::invoke(tc, inst);
+    if (!ArmSystem::highestELIs64(tc)) {
+        ArmFault::invoke(tc, inst);
+        tc->setMiscReg(MISCREG_VMPIDR,
+                       getMPIDR(dynamic_cast<ArmSystem*>(tc->getSystemPtr()), tc));
+
+        // Unless we have SMC code to get us there, boot in HYP!
+        if (ArmSystem::haveVirtualization(tc) &&
+            !ArmSystem::haveSecurity(tc)) {
+            CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+            cpsr.mode = MODE_HYP;
+            tc->setMiscReg(MISCREG_CPSR, cpsr);
+        }
+    } else {
+        // Advance the PC to the IMPLEMENTATION DEFINED reset value
+        PCState pc = ArmSystem::resetAddr64(tc);
+        pc.aarch64(true);
+        pc.nextAArch64(true);
+        tc->pcState(pc);
+    }
 }
 
 void
@@ -196,6 +727,45 @@ UndefinedInstruction::invoke(ThreadContext *tc, StaticInstPtr inst)
     }
 }
 
+bool
+UndefinedInstruction::routeToHyp(ThreadContext *tc) const
+{
+    bool toHyp;
+
+    SCR  scr  = tc->readMiscRegNoEffect(MISCREG_SCR);
+    HCR  hcr  = tc->readMiscRegNoEffect(MISCREG_HCR);
+    CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR);
+
+    // if in Hyp mode then stay in Hyp mode
+    toHyp  = scr.ns && (cpsr.mode == MODE_HYP);
+    // if HCR.TGE is set to 1, take to Hyp mode through Hyp Trap vector
+    toHyp |= !inSecureState(scr, cpsr) && hcr.tge && (cpsr.mode == MODE_USER);
+    return toHyp;
+}
+
+uint32_t
+UndefinedInstruction::iss() const
+{
+    if (overrideEc == EC_INVALID)
+        return issRaw;
+
+    uint32_t new_iss = 0;
+    uint32_t op0, op1, op2, CRn, CRm, Rt, dir;
+
+    dir = bits(machInst, 21, 21);
+    op0 = bits(machInst, 20, 19);
+    op1 = bits(machInst, 18, 16);
+    CRn = bits(machInst, 15, 12);
+    CRm = bits(machInst, 11, 8);
+    op2 = bits(machInst, 7, 5);
+    Rt = bits(machInst, 4, 0);
+
+    new_iss = op0 << 20 | op2 << 17 | op1 << 14 | CRn << 10 |
+            Rt << 5 | CRm << 1 | dir;
+
+    return new_iss;
+}
+
 void
 SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst)
 {
@@ -207,7 +777,12 @@ SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst)
     // As of now, there isn't a 32 bit thumb version of this instruction.
     assert(!machInst.bigThumb);
     uint32_t callNum;
-    callNum = tc->readIntReg(INTREG_R7);
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    OperatingMode mode = (OperatingMode)(uint8_t)cpsr.mode;
+    if (opModeIs64(mode))
+        callNum = tc->readIntReg(INTREG_X8);
+    else
+        callNum = tc->readIntReg(INTREG_R7);
     tc->syscall(callNum);
 
     // Advance the PC since that won't happen automatically.
@@ -217,21 +792,593 @@ SupervisorCall::invoke(ThreadContext *tc, StaticInstPtr inst)
     tc->pcState(pc);
 }
 
+bool
+SupervisorCall::routeToHyp(ThreadContext *tc) const
+{
+    bool toHyp;
+
+    SCR  scr  = tc->readMiscRegNoEffect(MISCREG_SCR);
+    HCR  hcr  = tc->readMiscRegNoEffect(MISCREG_HCR);
+    CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR);
+
+    // if in Hyp mode then stay in Hyp mode
+    toHyp  = scr.ns && (cpsr.mode == MODE_HYP);
+    // if HCR.TGE is set to 1, take to Hyp mode through Hyp Trap vector
+    toHyp |= !inSecureState(scr, cpsr) && hcr.tge && (cpsr.mode == MODE_USER);
+    return toHyp;
+}
+
+ExceptionClass
+SupervisorCall::ec(ThreadContext *tc) const
+{
+    return (overrideEc != EC_INVALID) ? overrideEc :
+        (from64 ? EC_SVC_64 : vals.ec);
+}
+
+uint32_t
+SupervisorCall::iss() const
+{
+    // Even if we have a 24 bit imm from an arm32 instruction then we only use
+    // the bottom 16 bits for the ISS value (it doesn't hurt for AArch64 SVC).
+    return issRaw & 0xFFFF;
+}
+
+uint32_t
+SecureMonitorCall::iss() const
+{
+    if (from64)
+        return bits(machInst, 20, 5);
+    return 0;
+}
+
+ExceptionClass
+UndefinedInstruction::ec(ThreadContext *tc) const
+{
+    return (overrideEc != EC_INVALID) ? overrideEc : vals.ec;
+}
+
+
+HypervisorCall::HypervisorCall(ExtMachInst _machInst, uint32_t _imm) :
+        ArmFaultVals<HypervisorCall>(_machInst, _imm)
+{}
+
+ExceptionClass
+HypervisorTrap::ec(ThreadContext *tc) const
+{
+    return (overrideEc != EC_INVALID) ? overrideEc : vals.ec;
+}
+
+template<class T>
+FaultOffset
+ArmFaultVals<T>::offset(ThreadContext *tc)
+{
+    bool isHypTrap = false;
+
+    // Normally we just use the exception vector from the table at the top if
+    // this file, however if this exception has caused a transition to hype
+    // mode, and its an exception type that would only do this if it has been
+    // trapped then we use the hyp trap vector instead of the normal vector
+    if (vals.hypTrappable) {
+        CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+        if (cpsr.mode == MODE_HYP) {
+            CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP);
+            isHypTrap = spsr.mode != MODE_HYP;
+        }
+    }
+    return isHypTrap ? 0x14 : vals.offset;
+}
+
+// void
+// SupervisorCall::setSyndrome64(ThreadContext *tc, MiscRegIndex esr_idx)
+// {
+//     ESR esr = 0;
+//     esr.ec = machInst.aarch64 ? SvcAArch64 : SvcAArch32;
+//     esr.il = !machInst.thumb;
+//     if (machInst.aarch64)
+//         esr.imm16 = bits(machInst.instBits, 20, 5);
+//     else if (machInst.thumb)
+//         esr.imm16 = bits(machInst.instBits, 7, 0);
+//     else
+//         esr.imm16 = bits(machInst.instBits, 15, 0);
+//     tc->setMiscReg(esr_idx, esr);
+// }
+
+void
+SecureMonitorCall::invoke(ThreadContext *tc, StaticInstPtr inst)
+{
+    if (FullSystem) {
+        ArmFault::invoke(tc, inst);
+        return;
+    }
+}
+
+ExceptionClass
+SecureMonitorCall::ec(ThreadContext *tc) const
+{
+    return (from64 ? EC_SMC_64 : vals.ec);
+}
+
+ExceptionClass
+SupervisorTrap::ec(ThreadContext *tc) const
+{
+    return (overrideEc != EC_INVALID) ? overrideEc : vals.ec;
+}
+
+ExceptionClass
+SecureMonitorTrap::ec(ThreadContext *tc) const
+{
+    return (overrideEc != EC_INVALID) ? overrideEc :
+        (from64 ? EC_SMC_64 : vals.ec);
+}
+
 template<class T>
 void
 AbortFault<T>::invoke(ThreadContext *tc, StaticInstPtr inst)
 {
-    ArmFaultVals<T>::invoke(tc, inst);
-    FSR fsr = 0;
-    fsr.fsLow = bits(status, 3, 0);
-    fsr.fsHigh = bits(status, 4);
-    fsr.domain = domain;
-    fsr.wnr = (write ? 1 : 0);
-    fsr.ext = 0;
-    tc->setMiscReg(T::FsrIndex, fsr);
-    tc->setMiscReg(T::FarIndex, faultAddr);
+    if (tranMethod == ArmFault::UnknownTran) {
+        tranMethod = longDescFormatInUse(tc) ? ArmFault::LpaeTran
+                                             : ArmFault::VmsaTran;
 
-    DPRINTF(Faults, "Abort Fault fsr=%#x faultAddr=%#x\n", fsr, faultAddr);
+        if ((tranMethod == ArmFault::VmsaTran) && this->routeToMonitor(tc)) {
+            // See ARM ARM B3-1416
+            bool override_LPAE = false;
+            TTBCR ttbcr_s = tc->readMiscReg(MISCREG_TTBCR_S);
+            TTBCR M5_VAR_USED ttbcr_ns = tc->readMiscReg(MISCREG_TTBCR_NS);
+            if (ttbcr_s.eae) {
+                override_LPAE = true;
+            } else {
+                // Unimplemented code option, not seen in testing.  May need
+                // extension according to the manual exceprt above.
+                DPRINTF(Faults, "Warning: Incomplete translation method "
+                        "override detected.\n");
+            }
+            if (override_LPAE)
+                tranMethod = ArmFault::LpaeTran;
+        }
+    }
+
+    if (source == ArmFault::AsynchronousExternalAbort) {
+        tc->getCpuPtr()->clearInterrupt(INT_ABT, 0);
+    }
+    // Get effective fault source encoding
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    FSR  fsr  = getFsr(tc);
+
+    // source must be determined BEFORE invoking generic routines which will
+    // try to set hsr etc. and are based upon source!
+    ArmFaultVals<T>::invoke(tc, inst);
+
+    if (cpsr.width) {  // AArch32
+        if (cpsr.mode == MODE_HYP) {
+            tc->setMiscReg(T::HFarIndex, faultAddr);
+        } else if (stage2) {
+            tc->setMiscReg(MISCREG_HPFAR, (faultAddr >> 8) & ~0xf);
+            tc->setMiscReg(T::HFarIndex,  OVAddr);
+        } else {
+            tc->setMiscReg(T::FsrIndex, fsr);
+            tc->setMiscReg(T::FarIndex, faultAddr);
+        }
+        DPRINTF(Faults, "Abort Fault source=%#x fsr=%#x faultAddr=%#x "\
+                "tranMethod=%#x\n", source, fsr, faultAddr, tranMethod);
+    } else {  // AArch64
+        // Set the FAR register.  Nothing else to do if we are in AArch64 state
+        // because the syndrome register has already been set inside invoke64()
+        tc->setMiscReg(AbortFault<T>::getFaultAddrReg64(), faultAddr);
+    }
+}
+
+template<class T>
+FSR
+AbortFault<T>::getFsr(ThreadContext *tc)
+{
+    FSR fsr = 0;
+
+    if (((CPSR) tc->readMiscRegNoEffect(MISCREG_CPSR)).width) {
+        // AArch32
+        assert(tranMethod != ArmFault::UnknownTran);
+        if (tranMethod == ArmFault::LpaeTran) {
+            srcEncoded = ArmFault::longDescFaultSources[source];
+            fsr.status = srcEncoded;
+            fsr.lpae   = 1;
+        } else {
+            srcEncoded = ArmFault::shortDescFaultSources[source];
+            fsr.fsLow  = bits(srcEncoded, 3, 0);
+            fsr.fsHigh = bits(srcEncoded, 4);
+            fsr.domain = static_cast<uint8_t>(domain);
+        }
+        fsr.wnr = (write ? 1 : 0);
+        fsr.ext = 0;
+    } else {
+        // AArch64
+        srcEncoded = ArmFault::aarch64FaultSources[source];
+    }
+    if (srcEncoded == ArmFault::FaultSourceInvalid) {
+        panic("Invalid fault source\n");
+    }
+    return fsr;
+}
+
+template<class T>
+bool
+AbortFault<T>::abortDisable(ThreadContext *tc)
+{
+    if (ArmSystem::haveSecurity(tc)) {
+        SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+        return (!scr.ns || scr.aw);
+    }
+    return true;
+}
+
+template<class T>
+void
+AbortFault<T>::annotate(ArmFault::AnnotationIDs id, uint64_t val)
+{
+    switch (id)
+    {
+      case ArmFault::S1PTW:
+        s1ptw = val;
+        break;
+      case ArmFault::OVA:
+        OVAddr = val;
+        break;
+
+      // Just ignore unknown ID's
+      default:
+        break;
+    }
+}
+
+template<class T>
+uint32_t
+AbortFault<T>::iss() const
+{
+    uint32_t val;
+
+    val  = srcEncoded & 0x3F;
+    val |= write << 6;
+    val |= s1ptw << 7;
+    return (val);
+}
+
+template<class T>
+bool
+AbortFault<T>::isMMUFault() const
+{
+    // NOTE: Not relying on LL information being aligned to lowest bits here
+    return
+         (source == ArmFault::AlignmentFault)     ||
+        ((source >= ArmFault::TranslationLL) &&
+         (source <  ArmFault::TranslationLL + 4)) ||
+        ((source >= ArmFault::AccessFlagLL) &&
+         (source <  ArmFault::AccessFlagLL + 4))  ||
+        ((source >= ArmFault::DomainLL) &&
+         (source <  ArmFault::DomainLL + 4))      ||
+        ((source >= ArmFault::PermissionLL) &&
+         (source <  ArmFault::PermissionLL + 4));
+}
+
+ExceptionClass
+PrefetchAbort::ec(ThreadContext *tc) const
+{
+    if (to64) {
+        // AArch64
+        if (toEL == fromEL)
+            return EC_PREFETCH_ABORT_CURR_EL;
+        else
+            return EC_PREFETCH_ABORT_LOWER_EL;
+    } else {
+        // AArch32
+        // Abort faults have different EC codes depending on whether
+        // the fault originated within HYP mode, or not. So override
+        // the method and add the extra adjustment of the EC value.
+
+        ExceptionClass ec = ArmFaultVals<PrefetchAbort>::vals.ec;
+
+        CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP);
+        if (spsr.mode == MODE_HYP) {
+            ec = ((ExceptionClass) (((uint32_t) ec) + 1));
+        }
+        return ec;
+    }
+}
+
+bool
+PrefetchAbort::routeToMonitor(ThreadContext *tc) const
+{
+    SCR scr = 0;
+    if (from64)
+        scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3);
+    else
+        scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+
+    return scr.ea && !isMMUFault();
+}
+
+bool
+PrefetchAbort::routeToHyp(ThreadContext *tc) const
+{
+    bool toHyp;
+
+    SCR  scr  = tc->readMiscRegNoEffect(MISCREG_SCR);
+    HCR  hcr  = tc->readMiscRegNoEffect(MISCREG_HCR);
+    CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR);
+    HDCR hdcr = tc->readMiscRegNoEffect(MISCREG_HDCR);
+
+    // if in Hyp mode then stay in Hyp mode
+    toHyp  = scr.ns && (cpsr.mode == MODE_HYP);
+    // otherwise, check whether to take to Hyp mode through Hyp Trap vector
+    toHyp |= (stage2 ||
+                ( (source ==               DebugEvent) && hdcr.tde && (cpsr.mode !=  MODE_HYP)) ||
+                ( (source == SynchronousExternalAbort) && hcr.tge  && (cpsr.mode == MODE_USER))
+             ) && !inSecureState(scr, cpsr);
+    return toHyp;
+}
+
+ExceptionClass
+DataAbort::ec(ThreadContext *tc) const
+{
+    if (to64) {
+        // AArch64
+        if (source == ArmFault::AsynchronousExternalAbort) {
+            panic("Asynchronous External Abort should be handled with \
+                    SystemErrors (SErrors)!");
+        }
+        if (toEL == fromEL)
+            return EC_DATA_ABORT_CURR_EL;
+        else
+            return EC_DATA_ABORT_LOWER_EL;
+    } else {
+        // AArch32
+        // Abort faults have different EC codes depending on whether
+        // the fault originated within HYP mode, or not. So override
+        // the method and add the extra adjustment of the EC value.
+
+        ExceptionClass ec = ArmFaultVals<DataAbort>::vals.ec;
+
+        CPSR spsr = tc->readMiscReg(MISCREG_SPSR_HYP);
+        if (spsr.mode == MODE_HYP) {
+            ec = ((ExceptionClass) (((uint32_t) ec) + 1));
+        }
+        return ec;
+    }
+}
+
+bool
+DataAbort::routeToMonitor(ThreadContext *tc) const
+{
+    SCR scr = 0;
+    if (from64)
+        scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3);
+    else
+        scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+
+    return scr.ea && !isMMUFault();
+}
+
+bool
+DataAbort::routeToHyp(ThreadContext *tc) const
+{
+    bool toHyp;
+
+    SCR  scr  = tc->readMiscRegNoEffect(MISCREG_SCR);
+    HCR  hcr  = tc->readMiscRegNoEffect(MISCREG_HCR);
+    CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR);
+    HDCR hdcr = tc->readMiscRegNoEffect(MISCREG_HDCR);
+
+    // if in Hyp mode then stay in Hyp mode
+    toHyp  = scr.ns && (cpsr.mode == MODE_HYP);
+    // otherwise, check whether to take to Hyp mode through Hyp Trap vector
+    toHyp |= (stage2 ||
+                ( (cpsr.mode != MODE_HYP) && ( ((source == AsynchronousExternalAbort) && hcr.amo) ||
+                                               ((source == DebugEvent) && hdcr.tde) )
+                ) ||
+                ( (cpsr.mode == MODE_USER) && hcr.tge &&
+                  ((source == AlignmentFault)            ||
+                   (source == SynchronousExternalAbort))
+                )
+             ) && !inSecureState(scr, cpsr);
+    return toHyp;
+}
+
+uint32_t
+DataAbort::iss() const
+{
+    uint32_t val;
+
+    // Add on the data abort specific fields to the generic abort ISS value
+    val  = AbortFault<DataAbort>::iss();
+    // ISS is valid if not caused by a stage 1 page table walk, and when taken
+    // to AArch64 only when directed to EL2
+    if (!s1ptw && (!to64 || toEL == EL2)) {
+        val |= isv << 24;
+        if (isv) {
+            val |= sas << 22;
+            val |= sse << 21;
+            val |= srt << 16;
+            // AArch64 only. These assignments are safe on AArch32 as well
+            // because these vars are initialized to false
+            val |= sf << 15;
+            val |= ar << 14;
+        }
+    }
+    return (val);
+}
+
+void
+DataAbort::annotate(AnnotationIDs id, uint64_t val)
+{
+    AbortFault<DataAbort>::annotate(id, val);
+    switch (id)
+    {
+      case SAS:
+        isv = true;
+        sas = val;
+        break;
+      case SSE:
+        isv = true;
+        sse = val;
+        break;
+      case SRT:
+        isv = true;
+        srt = val;
+        break;
+      case SF:
+        isv = true;
+        sf  = val;
+        break;
+      case AR:
+        isv = true;
+        ar  = val;
+        break;
+      // Just ignore unknown ID's
+      default:
+        break;
+    }
+}
+
+void
+VirtualDataAbort::invoke(ThreadContext *tc, StaticInstPtr inst)
+{
+    AbortFault<VirtualDataAbort>::invoke(tc, inst);
+    HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR);
+    hcr.va = 0;
+    tc->setMiscRegNoEffect(MISCREG_HCR, hcr);
+}
+
+bool
+Interrupt::routeToMonitor(ThreadContext *tc) const
+{
+    assert(ArmSystem::haveSecurity(tc));
+    SCR scr = 0;
+    if (from64)
+        scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3);
+    else
+        scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+    return scr.irq;
+}
+
+bool
+Interrupt::routeToHyp(ThreadContext *tc) const
+{
+    bool toHyp;
+
+    SCR  scr  = tc->readMiscRegNoEffect(MISCREG_SCR);
+    HCR  hcr  = tc->readMiscRegNoEffect(MISCREG_HCR);
+    CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR);
+    // Determine whether IRQs are routed to Hyp mode.
+    toHyp = (!scr.irq && hcr.imo && !inSecureState(scr, cpsr)) ||
+            (cpsr.mode == MODE_HYP);
+    return toHyp;
+}
+
+bool
+Interrupt::abortDisable(ThreadContext *tc)
+{
+    if (ArmSystem::haveSecurity(tc)) {
+        SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+        return (!scr.ns || scr.aw);
+    }
+    return true;
+}
+
+VirtualInterrupt::VirtualInterrupt()
+{}
+
+bool
+FastInterrupt::routeToMonitor(ThreadContext *tc) const
+{
+    assert(ArmSystem::haveSecurity(tc));
+    SCR scr = 0;
+    if (from64)
+        scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3);
+    else
+        scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+    return scr.fiq;
+}
+
+bool
+FastInterrupt::routeToHyp(ThreadContext *tc) const
+{
+    bool toHyp;
+
+    SCR  scr  = tc->readMiscRegNoEffect(MISCREG_SCR);
+    HCR  hcr  = tc->readMiscRegNoEffect(MISCREG_HCR);
+    CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR);
+    // Determine whether IRQs are routed to Hyp mode.
+    toHyp = (!scr.fiq && hcr.fmo && !inSecureState(scr, cpsr)) ||
+            (cpsr.mode == MODE_HYP);
+    return toHyp;
+}
+
+bool
+FastInterrupt::abortDisable(ThreadContext *tc)
+{
+    if (ArmSystem::haveSecurity(tc)) {
+        SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+        return (!scr.ns || scr.aw);
+    }
+    return true;
+}
+
+bool
+FastInterrupt::fiqDisable(ThreadContext *tc)
+{
+    if (ArmSystem::haveVirtualization(tc)) {
+        return true;
+    } else if (ArmSystem::haveSecurity(tc)) {
+        SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR);
+        return (!scr.ns || scr.fw);
+    }
+    return true;
+}
+
+VirtualFastInterrupt::VirtualFastInterrupt()
+{}
+
+void
+PCAlignmentFault::invoke(ThreadContext *tc, StaticInstPtr inst)
+{
+    ArmFaultVals<PCAlignmentFault>::invoke(tc, inst);
+    assert(from64);
+    // Set the FAR
+    tc->setMiscReg(getFaultAddrReg64(), faultPC);
+}
+
+SPAlignmentFault::SPAlignmentFault()
+{}
+
+SystemError::SystemError()
+{}
+
+void
+SystemError::invoke(ThreadContext *tc, StaticInstPtr inst)
+{
+    tc->getCpuPtr()->clearInterrupt(INT_ABT, 0);
+    ArmFault::invoke(tc, inst);
+}
+
+bool
+SystemError::routeToMonitor(ThreadContext *tc) const
+{
+    assert(ArmSystem::haveSecurity(tc));
+    assert(from64);
+    SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3);
+    return scr.ea;
+}
+
+bool
+SystemError::routeToHyp(ThreadContext *tc) const
+{
+    bool toHyp;
+    assert(from64);
+
+    SCR scr = tc->readMiscRegNoEffect(MISCREG_SCR_EL3);
+    HCR hcr  = tc->readMiscRegNoEffect(MISCREG_HCR);
+    CPSR cpsr = tc->readMiscRegNoEffect(MISCREG_CPSR);
+
+    toHyp = (!scr.ea && hcr.amo && !inSecureState(scr, cpsr)) ||
+            (!scr.ea && !scr.rw && !hcr.amo && !inSecureState(scr,cpsr));
+    return toHyp;
 }
 
 void
@@ -247,11 +1394,6 @@ FlushPipe::invoke(ThreadContext *tc, StaticInstPtr inst) {
     tc->pcState(pc);
 }
 
-template void AbortFault<PrefetchAbort>::invoke(ThreadContext *tc,
-                                                StaticInstPtr inst);
-template void AbortFault<DataAbort>::invoke(ThreadContext *tc,
-                                            StaticInstPtr inst);
-
 void
 ArmSev::invoke(ThreadContext *tc, StaticInstPtr inst) {
     DPRINTF(Faults, "Invoking ArmSev Fault\n");
@@ -265,6 +1407,34 @@ ArmSev::invoke(ThreadContext *tc, StaticInstPtr inst) {
     tc->getCpuPtr()->clearInterrupt(INT_SEV, 0);
 }
 
-// return via SUBS pc, lr, xxx; rfe, movs, ldm
+// Instantiate all the templates to make the linker happy
+template class ArmFaultVals<Reset>;
+template class ArmFaultVals<UndefinedInstruction>;
+template class ArmFaultVals<SupervisorCall>;
+template class ArmFaultVals<SecureMonitorCall>;
+template class ArmFaultVals<HypervisorCall>;
+template class ArmFaultVals<PrefetchAbort>;
+template class ArmFaultVals<DataAbort>;
+template class ArmFaultVals<VirtualDataAbort>;
+template class ArmFaultVals<HypervisorTrap>;
+template class ArmFaultVals<Interrupt>;
+template class ArmFaultVals<VirtualInterrupt>;
+template class ArmFaultVals<FastInterrupt>;
+template class ArmFaultVals<VirtualFastInterrupt>;
+template class ArmFaultVals<SupervisorTrap>;
+template class ArmFaultVals<SecureMonitorTrap>;
+template class ArmFaultVals<PCAlignmentFault>;
+template class ArmFaultVals<SPAlignmentFault>;
+template class ArmFaultVals<SystemError>;
+template class ArmFaultVals<FlushPipe>;
+template class ArmFaultVals<ArmSev>;
+template class AbortFault<PrefetchAbort>;
+template class AbortFault<DataAbort>;
+template class AbortFault<VirtualDataAbort>;
+
+
+IllegalInstSetStateFault::IllegalInstSetStateFault()
+{}
+
 
 } // namespace ArmISA
diff --git a/src/arch/arm/faults.hh b/src/arch/arm/faults.hh
index 9858e52ef..a5720f115 100644
--- a/src/arch/arm/faults.hh
+++ b/src/arch/arm/faults.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -40,12 +40,15 @@
  *
  * Authors: Ali Saidi
  *          Gabe Black
+ *          Giacomo Gabrielli
+ *          Thomas Grocutt
  */
 
 #ifndef __ARM_FAULTS_HH__
 #define __ARM_FAULTS_HH__
 
 #include "arch/arm/miscregs.hh"
+#include "arch/arm/pagetable.hh"
 #include "arch/arm/types.hh"
 #include "base/misc.hh"
 #include "sim/faults.hh"
@@ -60,63 +63,146 @@ typedef const Addr FaultOffset;
 class ArmFault : public FaultBase
 {
   protected:
+    ExtMachInst machInst;
+    uint32_t issRaw;
+
+    // Helper variables for ARMv8 exception handling
+    bool from64;  // True if the exception is generated from the AArch64 state
+    bool to64;  // True if the exception is taken in AArch64 state
+    ExceptionLevel fromEL;  // Source exception level
+    ExceptionLevel toEL;  // Target exception level
+    OperatingMode fromMode;  // Source operating mode
+
     Addr getVector(ThreadContext *tc);
+    Addr getVector64(ThreadContext *tc);
 
   public:
-    enum StatusEncoding
+    /// Generic fault source enums used to index into
+    /// {short/long/aarch64}DescFaultSources[] to get the actual encodings based
+    /// on the current register width state and the translation table format in
+    /// use
+    enum FaultSource
     {
-        // Fault Status register encodings
-        // ARM ARM B3.9.4
-        AlignmentFault = 0x1,
-        DebugEvent = 0x2,
-        AccessFlag0 = 0x3,
-        InstructionCacheMaintenance = 0x4,
-        Translation0 = 0x5,
-        AccessFlag1 = 0x6,
-        Translation1 = 0x7,
-        SynchronousExternalAbort0 = 0x8,
-        Domain0 = 0x9,
-        SynchronousExternalAbort1 = 0x8,
-        Domain1 = 0xb,
-        TranslationTableWalkExtAbt0 = 0xc,
-        Permission0 = 0xd,
-        TranslationTableWalkExtAbt1 = 0xe,
-        Permission1 = 0xf,
-        AsynchronousExternalAbort = 0x16,
-        MemoryAccessAsynchronousParityError = 0x18,
-        MemoryAccessSynchronousParityError = 0x19,
-        TranslationTableWalkPrtyErr0 = 0x1c,
-        TranslationTableWalkPrtyErr1 = 0x1e,
+        AlignmentFault = 0,
+        InstructionCacheMaintenance,  // Short-desc. format only
+        SynchExtAbtOnTranslTableWalkLL,
+        SynchPtyErrOnTranslTableWalkLL = SynchExtAbtOnTranslTableWalkLL + 4,
+        TranslationLL = SynchPtyErrOnTranslTableWalkLL + 4,
+        AccessFlagLL = TranslationLL + 4,
+        DomainLL = AccessFlagLL + 4,
+        PermissionLL = DomainLL + 4,
+        DebugEvent = PermissionLL + 4,
+        SynchronousExternalAbort,
+        TLBConflictAbort,  // Requires LPAE
+        SynchPtyErrOnMemoryAccess,
+        AsynchronousExternalAbort,
+        AsynchPtyErrOnMemoryAccess,
+        AddressSizeLL,  // AArch64 only
 
-        // not a real fault. This is a status code
-        // to allow the translation function to inform
-        // the memory access function not to proceed
-        // for a Prefetch that misses in the TLB.
-        PrefetchTLBMiss = 0x1f,
-        PrefetchUncacheable = 0x20
+        // Not real faults. These are faults to allow the translation function
+        // to inform the memory access function not to proceed for a prefetch
+        // that misses in the TLB or that targets an uncacheable address
+        PrefetchTLBMiss = AddressSizeLL + 4,
+        PrefetchUncacheable,
+
+        NumFaultSources,
+        FaultSourceInvalid = 0xff
+    };
+
+    /// Encodings of the fault sources when the short-desc. translation table
+    /// format is in use (ARM ARM Issue C B3.13.3)
+    static uint8_t shortDescFaultSources[NumFaultSources];
+    /// Encodings of the fault sources when the long-desc. translation table
+    /// format is in use (ARM ARM Issue C B3.13.3)
+    static uint8_t longDescFaultSources[NumFaultSources];
+    /// Encodings of the fault sources in AArch64 state
+    static uint8_t aarch64FaultSources[NumFaultSources];
+
+    enum AnnotationIDs
+    {
+        S1PTW, // DataAbort, PrefetchAbort: Stage 1 Page Table Walk,
+        OVA,   // DataAbort, PrefetchAbort: stage 1 Virtual Address for stage 2 faults
+        SAS,   // DataAbort: Syndrome Access Size
+        SSE,   // DataAbort: Syndrome Sign Extend
+        SRT,   // DataAbort: Syndrome Register Transfer
+
+        // AArch64 only
+        SF,    // DataAbort: width of the accessed register is SixtyFour
+        AR     // DataAbort: Acquire/Release semantics
+    };
+
+    enum TranMethod
+    {
+        LpaeTran,
+        VmsaTran,
+        UnknownTran
     };
 
     struct FaultVals
     {
         const FaultName name;
+
         const FaultOffset offset;
+
+        // Offsets used for exceptions taken in AArch64 state
+        const uint16_t currELTOffset;
+        const uint16_t currELHOffset;
+        const uint16_t lowerEL64Offset;
+        const uint16_t lowerEL32Offset;
+
         const OperatingMode nextMode;
+
         const uint8_t armPcOffset;
         const uint8_t thumbPcOffset;
+        // The following two values are used in place of armPcOffset and
+        // thumbPcOffset when the exception return address is saved into ELR
+        // registers (exceptions taken in HYP mode or in AArch64 state)
+        const uint8_t armPcElrOffset;
+        const uint8_t thumbPcElrOffset;
+
+        const bool hypTrappable;
         const bool abortDisable;
         const bool fiqDisable;
+
+        // Exception class used to appropriately set the syndrome register
+        // (exceptions taken in HYP mode or in AArch64 state)
+        const ExceptionClass ec;
+
         FaultStat count;
     };
 
+    ArmFault(ExtMachInst _machInst = 0, uint32_t _iss = 0) :
+        machInst(_machInst), issRaw(_iss), from64(false), to64(false) {}
+
+    // Returns the actual syndrome register to use based on the target
+    // exception level
+    MiscRegIndex getSyndromeReg64() const;
+    // Returns the actual fault address register to use based on the target
+    // exception level
+    MiscRegIndex getFaultAddrReg64() const;
+
     void invoke(ThreadContext *tc,
             StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+    void invoke64(ThreadContext *tc,
+            StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+    virtual void annotate(AnnotationIDs id, uint64_t val) {}
     virtual FaultStat& countStat() = 0;
-    virtual FaultOffset offset() = 0;
+    virtual FaultOffset offset(ThreadContext *tc) = 0;
+    virtual FaultOffset offset64() = 0;
     virtual OperatingMode nextMode() = 0;
-    virtual uint8_t armPcOffset() = 0;
-    virtual uint8_t thumbPcOffset() = 0;
-    virtual bool abortDisable() = 0;
-    virtual bool fiqDisable() = 0;
+    virtual bool routeToMonitor(ThreadContext *tc) const = 0;
+    virtual bool routeToHyp(ThreadContext *tc) const { return false; }
+    virtual uint8_t armPcOffset(bool isHyp) = 0;
+    virtual uint8_t thumbPcOffset(bool isHyp) = 0;
+    virtual uint8_t armPcElrOffset() = 0;
+    virtual uint8_t thumbPcElrOffset() = 0;
+    virtual bool abortDisable(ThreadContext *tc) = 0;
+    virtual bool fiqDisable(ThreadContext *tc) = 0;
+    virtual ExceptionClass ec(ThreadContext *tc) const = 0;
+    virtual uint32_t iss() const = 0;
+    virtual bool isStage2() const { return false; }
+    virtual FSR getFsr(ThreadContext *tc) { return 0; }
+    virtual void setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg);
 };
 
 template<typename T>
@@ -126,14 +212,38 @@ class ArmFaultVals : public ArmFault
     static FaultVals vals;
 
   public:
+    ArmFaultVals<T>(ExtMachInst _machInst = 0, uint32_t _iss = 0) :
+        ArmFault(_machInst, _iss) {}
     FaultName name() const { return vals.name; }
-    FaultStat & countStat() {return vals.count;}
-    FaultOffset offset() { return vals.offset; }
+    FaultStat & countStat() { return vals.count; }
+    FaultOffset offset(ThreadContext *tc);
+
+    FaultOffset
+    offset64()
+    {
+        if (toEL == fromEL) {
+            if (opModeIsT(fromMode))
+                return vals.currELTOffset;
+            return vals.currELHOffset;
+        } else {
+            if (from64)
+                return vals.lowerEL64Offset;
+            return vals.lowerEL32Offset;
+        }
+    }
+
     OperatingMode nextMode() { return vals.nextMode; }
-    uint8_t armPcOffset() { return vals.armPcOffset; }
-    uint8_t thumbPcOffset() { return vals.thumbPcOffset; }
-    bool abortDisable() { return vals.abortDisable; }
-    bool fiqDisable() { return vals.fiqDisable; }
+    virtual bool routeToMonitor(ThreadContext *tc) const { return false; }
+    uint8_t armPcOffset(bool isHyp)   { return isHyp ? vals.armPcElrOffset
+                                                     : vals.armPcOffset; }
+    uint8_t thumbPcOffset(bool isHyp) { return isHyp ? vals.thumbPcElrOffset
+                                                     : vals.thumbPcOffset; }
+    uint8_t armPcElrOffset() { return vals.armPcElrOffset; }
+    uint8_t thumbPcElrOffset() { return vals.thumbPcElrOffset; }
+    virtual bool abortDisable(ThreadContext* tc) { return vals.abortDisable; }
+    virtual bool fiqDisable(ThreadContext* tc) { return vals.fiqDisable; }
+    virtual ExceptionClass ec(ThreadContext *tc) const { return vals.ec; }
+    virtual uint32_t iss() const { return issRaw; }
 };
 
 class Reset : public ArmFaultVals<Reset>
@@ -146,87 +256,283 @@ class Reset : public ArmFaultVals<Reset>
 class UndefinedInstruction : public ArmFaultVals<UndefinedInstruction>
 {
   protected:
-    ExtMachInst machInst;
     bool unknown;
     const char *mnemonic;
     bool disabled;
+    ExceptionClass overrideEc;
 
   public:
     UndefinedInstruction(ExtMachInst _machInst,
                          bool _unknown,
                          const char *_mnemonic = NULL,
                          bool _disabled = false) :
-        machInst(_machInst), unknown(_unknown),
-        mnemonic(_mnemonic), disabled(_disabled)
-    {
-    }
-    UndefinedInstruction() :
-        machInst(0), unknown(false), mnemonic("undefined"), disabled(false)
+        ArmFaultVals<UndefinedInstruction>(_machInst),
+        unknown(_unknown), mnemonic(_mnemonic), disabled(_disabled),
+        overrideEc(EC_INVALID)
+    {}
+    UndefinedInstruction(ExtMachInst _machInst, uint32_t _iss, ExceptionClass _overrideEc) :
+        ArmFaultVals<UndefinedInstruction>(_machInst, _iss),
+        overrideEc(_overrideEc)
     {}
 
     void invoke(ThreadContext *tc,
             StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+    bool routeToHyp(ThreadContext *tc) const;
+    ExceptionClass ec(ThreadContext *tc) const;
+    uint32_t iss() const;
 };
 
 class SupervisorCall : public ArmFaultVals<SupervisorCall>
 {
   protected:
-    ExtMachInst machInst;
-
+    ExceptionClass overrideEc;
   public:
-    SupervisorCall(ExtMachInst _machInst) : machInst(_machInst)
-    {}
-    SupervisorCall() : machInst(0)
+    SupervisorCall(ExtMachInst _machInst, uint32_t _iss,
+                   ExceptionClass _overrideEc = EC_INVALID) :
+        ArmFaultVals<SupervisorCall>(_machInst, _iss),
+        overrideEc(_overrideEc)
     {}
 
     void invoke(ThreadContext *tc,
             StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+    bool routeToHyp(ThreadContext *tc) const;
+    ExceptionClass ec(ThreadContext *tc) const;
+    uint32_t iss() const;
+};
+
+class SecureMonitorCall : public ArmFaultVals<SecureMonitorCall>
+{
+  public:
+    SecureMonitorCall(ExtMachInst _machInst) :
+        ArmFaultVals<SecureMonitorCall>(_machInst)
+    {}
+
+    void invoke(ThreadContext *tc,
+            StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+    ExceptionClass ec(ThreadContext *tc) const;
+    uint32_t iss() const;
+};
+
+class SupervisorTrap : public ArmFaultVals<SupervisorTrap>
+{
+  protected:
+    ExtMachInst machInst;
+    ExceptionClass overrideEc;
+
+  public:
+    SupervisorTrap(ExtMachInst _machInst, uint32_t _iss,
+                   ExceptionClass _overrideEc = EC_INVALID) :
+        ArmFaultVals<SupervisorTrap>(_machInst, _iss),
+        overrideEc(_overrideEc)
+    {}
+
+    ExceptionClass ec(ThreadContext *tc) const;
+};
+
+class SecureMonitorTrap : public ArmFaultVals<SecureMonitorTrap>
+{
+ protected:
+    ExtMachInst machInst;
+    ExceptionClass overrideEc;
+
+  public:
+    SecureMonitorTrap(ExtMachInst _machInst, uint32_t _iss,
+                      ExceptionClass _overrideEc = EC_INVALID) :
+        ArmFaultVals<SecureMonitorTrap>(_machInst, _iss),
+        overrideEc(_overrideEc)
+    {}
+
+    ExceptionClass ec(ThreadContext *tc) const;
+};
+
+class HypervisorCall : public ArmFaultVals<HypervisorCall>
+{
+  public:
+    HypervisorCall(ExtMachInst _machInst, uint32_t _imm);
+};
+
+class HypervisorTrap : public ArmFaultVals<HypervisorTrap>
+{
+  protected:
+    ExtMachInst machInst;
+    ExceptionClass overrideEc;
+
+  public:
+    HypervisorTrap(ExtMachInst _machInst, uint32_t _iss,
+                   ExceptionClass _overrideEc = EC_INVALID) :
+      ArmFaultVals<HypervisorTrap>(_machInst, _iss),
+      overrideEc(_overrideEc)
+    {}
+
+    ExceptionClass ec(ThreadContext *tc) const;
 };
 
 template <class T>
 class AbortFault : public ArmFaultVals<T>
 {
   protected:
+    /**
+     * The virtual address the fault occured at. If 2 stages of
+     * translation are being used then this is the intermediate
+     * physical address that is the starting point for the second
+     * stage of translation.
+     */
     Addr faultAddr;
+    /**
+     * Original virtual address. If the fault was generated on the
+     * second stage of translation then this variable stores the
+     * virtual address used in the original stage 1 translation.
+     */
+    Addr OVAddr;
     bool write;
-    uint8_t domain;
-    uint8_t status;
+    TlbEntry::DomainType domain;
+    uint8_t source;
+    uint8_t srcEncoded;
+    bool stage2;
+    bool s1ptw;
+    ArmFault::TranMethod tranMethod;
 
   public:
-    AbortFault(Addr _faultAddr, bool _write,
-            uint8_t _domain, uint8_t _status) :
-        faultAddr(_faultAddr), write(_write),
-        domain(_domain), status(_status)
+    AbortFault(Addr _faultAddr, bool _write, TlbEntry::DomainType _domain, uint8_t _source,
+               bool _stage2, ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) :
+        faultAddr(_faultAddr), write(_write), domain(_domain), source(_source),
+        stage2(_stage2), s1ptw(false), tranMethod(_tranMethod)
     {}
 
     void invoke(ThreadContext *tc,
             StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+
+    FSR getFsr(ThreadContext *tc);
+    bool abortDisable(ThreadContext *tc);
+    uint32_t iss() const;
+    bool isStage2() const { return stage2; }
+    void annotate(ArmFault::AnnotationIDs id, uint64_t val);
+    bool isMMUFault() const;
 };
 
 class PrefetchAbort : public AbortFault<PrefetchAbort>
 {
   public:
-    static const MiscRegIndex FsrIndex = MISCREG_IFSR;
-    static const MiscRegIndex FarIndex = MISCREG_IFAR;
+    static const MiscRegIndex FsrIndex  = MISCREG_IFSR;
+    static const MiscRegIndex FarIndex  = MISCREG_IFAR;
+    static const MiscRegIndex HFarIndex = MISCREG_HIFAR;
 
-    PrefetchAbort(Addr _addr, uint8_t _status) :
-        AbortFault<PrefetchAbort>(_addr, false, 0, _status)
+    PrefetchAbort(Addr _addr, uint8_t _source, bool _stage2 = false,
+                  ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) :
+        AbortFault<PrefetchAbort>(_addr, false, TlbEntry::DomainType::NoAccess,
+                _source, _stage2, _tranMethod)
     {}
+
+    ExceptionClass ec(ThreadContext *tc) const;
+    // @todo: external aborts should be routed if SCR.EA == 1
+    bool routeToMonitor(ThreadContext *tc) const;
+    bool routeToHyp(ThreadContext *tc) const;
 };
 
 class DataAbort : public AbortFault<DataAbort>
 {
   public:
-    static const MiscRegIndex FsrIndex = MISCREG_DFSR;
-    static const MiscRegIndex FarIndex = MISCREG_DFAR;
+    static const MiscRegIndex FsrIndex  = MISCREG_DFSR;
+    static const MiscRegIndex FarIndex  = MISCREG_DFAR;
+    static const MiscRegIndex HFarIndex = MISCREG_HDFAR;
+    bool    isv;
+    uint8_t sas;
+    uint8_t sse;
+    uint8_t srt;
 
-    DataAbort(Addr _addr, uint8_t _domain, bool _write, uint8_t _status) :
-        AbortFault<DataAbort>(_addr, _write, _domain, _status)
+    // AArch64 only
+    bool sf;
+    bool ar;
+
+    DataAbort(Addr _addr, TlbEntry::DomainType _domain, bool _write, uint8_t _source,
+              bool _stage2 = false, ArmFault::TranMethod _tranMethod = ArmFault::UnknownTran) :
+        AbortFault<DataAbort>(_addr, _write, _domain, _source, _stage2,
+                              _tranMethod),
+        isv(false), sas (0), sse(0), srt(0), sf(false), ar(false)
     {}
+
+    ExceptionClass ec(ThreadContext *tc) const;
+    // @todo: external aborts should be routed if SCR.EA == 1
+    bool routeToMonitor(ThreadContext *tc) const;
+    bool routeToHyp(ThreadContext *tc) const;
+    uint32_t iss() const;
+    void annotate(AnnotationIDs id, uint64_t val);
 };
 
-class Interrupt : public ArmFaultVals<Interrupt> {};
-class FastInterrupt : public ArmFaultVals<FastInterrupt> {};
+class VirtualDataAbort : public AbortFault<VirtualDataAbort>
+{
+  public:
+    static const MiscRegIndex FsrIndex  = MISCREG_DFSR;
+    static const MiscRegIndex FarIndex  = MISCREG_DFAR;
+    static const MiscRegIndex HFarIndex = MISCREG_HDFAR;
+
+    VirtualDataAbort(Addr _addr, TlbEntry::DomainType _domain, bool _write,
+                     uint8_t _source) :
+        AbortFault<VirtualDataAbort>(_addr, _write, _domain, _source, false)
+    {}
+
+    void invoke(ThreadContext *tc, StaticInstPtr inst);
+};
+
+class Interrupt : public ArmFaultVals<Interrupt>
+{
+  public:
+    bool routeToMonitor(ThreadContext *tc) const;
+    bool routeToHyp(ThreadContext *tc) const;
+    bool abortDisable(ThreadContext *tc);
+};
+
+class VirtualInterrupt : public ArmFaultVals<VirtualInterrupt>
+{
+  public:
+    VirtualInterrupt();
+};
+
+class FastInterrupt : public ArmFaultVals<FastInterrupt>
+{
+  public:
+    bool routeToMonitor(ThreadContext *tc) const;
+    bool routeToHyp(ThreadContext *tc) const;
+    bool abortDisable(ThreadContext *tc);
+    bool fiqDisable(ThreadContext *tc);
+};
+
+class VirtualFastInterrupt : public ArmFaultVals<VirtualFastInterrupt>
+{
+  public:
+    VirtualFastInterrupt();
+};
+
+/// PC alignment fault (AArch64 only)
+class PCAlignmentFault : public ArmFaultVals<PCAlignmentFault>
+{
+  protected:
+    /// The unaligned value of the PC
+    Addr faultPC;
+  public:
+    PCAlignmentFault(Addr _faultPC) : faultPC(_faultPC)
+    {}
+    void invoke(ThreadContext *tc,
+            StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+};
+
+/// Stack pointer alignment fault (AArch64 only)
+class SPAlignmentFault : public ArmFaultVals<SPAlignmentFault>
+{
+  public:
+    SPAlignmentFault();
+};
+
+/// System error (AArch64 only)
+class SystemError : public ArmFaultVals<SystemError>
+{
+  public:
+    SystemError();
+    void invoke(ThreadContext *tc,
+            StaticInstPtr inst = StaticInst::nullStaticInstPtr);
+    bool routeToMonitor(ThreadContext *tc) const;
+    bool routeToHyp(ThreadContext *tc) const;
+};
 
 // A fault that flushes the pipe, excluding the faulting instructions
 class FlushPipe : public ArmFaultVals<FlushPipe>
@@ -246,6 +552,13 @@ class ArmSev : public ArmFaultVals<ArmSev>
             StaticInstPtr inst = StaticInst::nullStaticInstPtr);
 };
 
+/// Illegal Instruction Set State fault (AArch64 only)
+class IllegalInstSetStateFault : public ArmFaultVals<IllegalInstSetStateFault>
+{
+  public:
+    IllegalInstSetStateFault();
+};
+
 } // namespace ArmISA
 
 #endif // __ARM_FAULTS_HH__
diff --git a/src/arch/arm/insts/branch64.cc b/src/arch/arm/insts/branch64.cc
new file mode 100644
index 000000000..49ba3402a
--- /dev/null
+++ b/src/arch/arm/insts/branch64.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/arm/insts/branch64.hh"
+
+namespace ArmISA
+{
+
+ArmISA::PCState
+BranchImm64::branchTarget(const ArmISA::PCState &branchPC) const
+{
+    ArmISA::PCState pcs = branchPC;
+    pcs.instNPC(pcs.pc() + imm);
+    pcs.advance();
+    return pcs;
+}
+
+ArmISA::PCState
+BranchImmReg64::branchTarget(const ArmISA::PCState &branchPC) const
+{
+    ArmISA::PCState pcs = branchPC;
+    pcs.instNPC(pcs.pc() + imm);
+    pcs.advance();
+    return pcs;
+}
+
+ArmISA::PCState
+BranchImmImmReg64::branchTarget(const ArmISA::PCState &branchPC) const
+{
+    ArmISA::PCState pcs = branchPC;
+    pcs.instNPC(pcs.pc() + imm2);
+    pcs.advance();
+    return pcs;
+}
+
+std::string
+BranchImmCond64::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false, true, condCode);
+    printTarget(ss, pc + imm, symtab);
+    return ss.str();
+}
+
+std::string
+BranchImm64::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printTarget(ss, pc + imm, symtab);
+    return ss.str();
+}
+
+std::string
+BranchReg64::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, op1);
+    return ss.str();
+}
+
+std::string
+BranchRet64::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    if (op1 != INTREG_X30)
+        printReg(ss, op1);
+    return ss.str();
+}
+
+std::string
+BranchEret64::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    return ss.str();
+}
+
+std::string
+BranchImmReg64::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, op1);
+    ccprintf(ss, ", ");
+    printTarget(ss, pc + imm, symtab);
+    return ss.str();
+}
+
+std::string
+BranchImmImmReg64::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, op1);
+    ccprintf(ss, ", #%#x, ", imm1);
+    printTarget(ss, pc + imm2, symtab);
+    return ss.str();
+}
+
+} // namespace ArmISA
diff --git a/src/arch/arm/insts/branch64.hh b/src/arch/arm/insts/branch64.hh
new file mode 100644
index 000000000..48881e0c2
--- /dev/null
+++ b/src/arch/arm/insts/branch64.hh
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+#ifndef __ARCH_ARM_INSTS_BRANCH64_HH__
+#define __ARCH_ARM_INSTS_BRANCH64_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+
+namespace ArmISA
+{
+// Branch to a target computed with an immediate
+class BranchImm64 : public ArmStaticInst
+{
+  protected:
+    int64_t imm;
+
+  public:
+    BranchImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                int64_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass), imm(_imm)
+    {}
+
+    ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const;
+
+    /// Explicitly import the otherwise hidden branchTarget
+    using StaticInst::branchTarget;
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Conditionally Branch to a target computed with an immediate
+class BranchImmCond64 : public BranchImm64
+{
+  protected:
+    ConditionCode condCode;
+
+  public:
+    BranchImmCond64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                    int64_t _imm, ConditionCode _condCode) :
+        BranchImm64(mnem, _machInst, __opClass, _imm), condCode(_condCode)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Branch to a target computed with a register
+class BranchReg64 : public ArmStaticInst
+{
+  protected:
+    IntRegIndex op1;
+
+  public:
+    BranchReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _op1) :
+        ArmStaticInst(mnem, _machInst, __opClass), op1(_op1)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Ret instruction
+class BranchRet64 : public BranchReg64
+{
+  public:
+    BranchRet64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _op1) :
+        BranchReg64(mnem, _machInst, __opClass, _op1)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Eret instruction
+class BranchEret64 : public ArmStaticInst
+{
+  public:
+    BranchEret64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
+        ArmStaticInst(mnem, _machInst, __opClass)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Branch to a target computed with an immediate and a register
+class BranchImmReg64 : public ArmStaticInst
+{
+  protected:
+    int64_t imm;
+    IntRegIndex op1;
+
+  public:
+    BranchImmReg64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                   int64_t _imm, IntRegIndex _op1) :
+        ArmStaticInst(mnem, _machInst, __opClass), imm(_imm), op1(_op1)
+    {}
+
+    ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const;
+
+    /// Explicitly import the otherwise hidden branchTarget
+    using StaticInst::branchTarget;
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+// Branch to a target computed with two immediates
+class BranchImmImmReg64 : public ArmStaticInst
+{
+  protected:
+    int64_t imm1;
+    int64_t imm2;
+    IntRegIndex op1;
+
+  public:
+    BranchImmImmReg64(const char *mnem, ExtMachInst _machInst,
+                      OpClass __opClass, int64_t _imm1, int64_t _imm2,
+                      IntRegIndex _op1) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        imm1(_imm1), imm2(_imm2), op1(_op1)
+    {}
+
+    ArmISA::PCState branchTarget(const ArmISA::PCState &branchPC) const;
+
+    /// Explicitly import the otherwise hidden branchTarget
+    using StaticInst::branchTarget;
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+}
+
+#endif //__ARCH_ARM_INSTS_BRANCH_HH__
diff --git a/src/arch/arm/insts/data64.cc b/src/arch/arm/insts/data64.cc
new file mode 100644
index 000000000..f65219870
--- /dev/null
+++ b/src/arch/arm/insts/data64.cc
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/arm/insts/data64.hh"
+
+namespace ArmISA
+{
+
+std::string
+DataXImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printDataInst(ss, true, false, /*XXX not really s*/ false, dest, op1,
+                  INTREG_ZERO, INTREG_ZERO, 0, LSL, imm);
+    return ss.str();
+}
+
+std::string
+DataXImmOnlyOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", #%d", imm);
+    return ss.str();
+}
+
+std::string
+DataXSRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1,
+                  op2, INTREG_ZERO, shiftAmt, shiftType, 0);
+    return ss.str();
+}
+
+std::string
+DataXERegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printDataInst(ss, false, true, /*XXX not really s*/ false, dest, op1,
+                  op2, INTREG_ZERO, shiftAmt, LSL, 0);
+    return ss.str();
+}
+
+std::string
+DataX1RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, op1);
+    return ss.str();
+}
+
+std::string
+DataX1RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, op1);
+    ccprintf(ss, ", #%d", imm);
+    return ss.str();
+}
+
+std::string
+DataX1Reg2ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, op1);
+    ccprintf(ss, ", #%d, #%d", imm1, imm2);
+    return ss.str();
+}
+
+std::string
+DataX2RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, op1);
+    ccprintf(ss, ", ");
+    printReg(ss, op2);
+    return ss.str();
+}
+
+std::string
+DataX2RegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, op1);
+    ccprintf(ss, ", ");
+    printReg(ss, op2);
+    ccprintf(ss, ", #%d", imm);
+    return ss.str();
+}
+
+std::string
+DataX3RegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, op1);
+    ccprintf(ss, ", ");
+    printReg(ss, op2);
+    ccprintf(ss, ", ");
+    printReg(ss, op3);
+    return ss.str();
+}
+
+std::string
+DataXCondCompImmOp::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, op1);
+    ccprintf(ss, ", #%d, #%d", imm, defCc);
+    ccprintf(ss, ", ");
+    printCondition(ss, condCode, true);
+    return ss.str();
+}
+
+std::string
+DataXCondCompRegOp::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, op1);
+    ccprintf(ss, ", ");
+    printReg(ss, op2);
+    ccprintf(ss, ", #%d", defCc);
+    ccprintf(ss, ", ");
+    printCondition(ss, condCode, true);
+    return ss.str();
+}
+
+std::string
+DataXCondSelOp::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, op1);
+    ccprintf(ss, ", ");
+    printReg(ss, op2);
+    ccprintf(ss, ", ");
+    printCondition(ss, condCode, true);
+    return ss.str();
+}
+
+}
diff --git a/src/arch/arm/insts/data64.hh b/src/arch/arm/insts/data64.hh
new file mode 100644
index 000000000..8c0677b3d
--- /dev/null
+++ b/src/arch/arm/insts/data64.hh
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+#ifndef __ARCH_ARM_INSTS_DATA64_HH__
+#define __ARCH_ARM_INSTS_DATA64_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+#include "base/trace.hh"
+
+namespace ArmISA
+{
+
+class DataXImmOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1;
+    uint64_t imm;
+
+    DataXImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+               IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXImmOnlyOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest;
+    uint64_t imm;
+
+    DataXImmOnlyOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                   IntRegIndex _dest, uint64_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXSRegOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1, op2;
+    int32_t shiftAmt;
+    ArmShiftType shiftType;
+
+    DataXSRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                int32_t _shiftAmt, ArmShiftType _shiftType) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2),
+        shiftAmt(_shiftAmt), shiftType(_shiftType)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXERegOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1, op2;
+    ArmExtendType extendType;
+    int32_t shiftAmt;
+
+    DataXERegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                ArmExtendType _extendType, int32_t _shiftAmt) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2),
+        extendType(_extendType), shiftAmt(_shiftAmt)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX1RegOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1;
+
+    DataX1RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, IntRegIndex _op1) :
+        ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX1RegImmOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1;
+    uint64_t imm;
+
+    DataX1RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                   IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1),
+        imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX1Reg2ImmOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1;
+    uint64_t imm1, imm2;
+
+    DataX1Reg2ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                    IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1,
+                    uint64_t _imm2) :
+        ArmStaticInst(mnem, _machInst, __opClass), dest(_dest), op1(_op1),
+        imm1(_imm1), imm2(_imm2)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX2RegOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1, op2;
+
+    DataX2RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX2RegImmOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1, op2;
+    uint64_t imm;
+
+    DataX2RegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                   uint64_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2), imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataX3RegOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1, op2, op3;
+
+    DataX3RegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                IntRegIndex _op3) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2), op3(_op3)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXCondCompImmOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex op1;
+    uint64_t imm;
+    ConditionCode condCode;
+    uint8_t defCc;
+
+    DataXCondCompImmOp(const char *mnem, ExtMachInst _machInst,
+                      OpClass __opClass, IntRegIndex _op1, uint64_t _imm,
+                      ConditionCode _condCode, uint8_t _defCc) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        op1(_op1), imm(_imm), condCode(_condCode), defCc(_defCc)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXCondCompRegOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex op1, op2;
+    ConditionCode condCode;
+    uint8_t defCc;
+
+    DataXCondCompRegOp(const char *mnem, ExtMachInst _machInst,
+                       OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2,
+                       ConditionCode _condCode, uint8_t _defCc) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class DataXCondSelOp : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest, op1, op2;
+    ConditionCode condCode;
+
+    DataXCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                   ConditionCode _condCode) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+}
+
+#endif //__ARCH_ARM_INSTS_PREDINST_HH__
diff --git a/src/arch/arm/insts/fplib.cc b/src/arch/arm/insts/fplib.cc
new file mode 100644
index 000000000..1f44eed09
--- /dev/null
+++ b/src/arch/arm/insts/fplib.cc
@@ -0,0 +1,3086 @@
+/*
+* Copyright (c) 2012-2013 ARM Limited
+* All rights reserved
+*
+* The license below extends only to copyright in the software and shall
+* not be construed as granting a license to any other intellectual
+* property including but not limited to intellectual property relating
+* to a hardware implementation of the functionality of the software
+* licensed hereunder.  You may use the software subject to the license
+* terms below provided that you ensure that this notice is replicated
+* unmodified and in its entirety in all distributions of the software,
+* modified or unmodified, in source code or in binary form.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met: redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer;
+* redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution;
+* neither the name of the copyright holders nor the names of its
+* contributors may be used to endorse or promote products derived from
+* this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Authors: Edmund Grimley Evans
+*          Thomas Grocutt
+*/
+
+#include <stdint.h>
+
+#include <cassert>
+
+#include "fplib.hh"
+
+namespace ArmISA
+{
+
+#define FPLIB_RN 0
+#define FPLIB_RP 1
+#define FPLIB_RM 2
+#define FPLIB_RZ 3
+#define FPLIB_FZ 4
+#define FPLIB_DN 8
+#define FPLIB_AHP 16
+
+#define FPLIB_IDC 128 // Input Denormal
+#define FPLIB_IXC 16  // Inexact
+#define FPLIB_UFC 8   // Underflow
+#define FPLIB_OFC 4   // Overflow
+#define FPLIB_DZC 2   // Division by Zero
+#define FPLIB_IOC 1   // Invalid Operation
+
+static inline uint16_t
+lsl16(uint16_t x, uint32_t shift)
+{
+    return shift < 16 ? x << shift : 0;
+}
+
+static inline uint16_t
+lsr16(uint16_t x, uint32_t shift)
+{
+    return shift < 16 ? x >> shift : 0;
+}
+
+static inline uint32_t
+lsl32(uint32_t x, uint32_t shift)
+{
+    return shift < 32 ? x << shift : 0;
+}
+
+static inline uint32_t
+lsr32(uint32_t x, uint32_t shift)
+{
+    return shift < 32 ? x >> shift : 0;
+}
+
+static inline uint64_t
+lsl64(uint64_t x, uint32_t shift)
+{
+    return shift < 64 ? x << shift : 0;
+}
+
+static inline uint64_t
+lsr64(uint64_t x, uint32_t shift)
+{
+    return shift < 64 ? x >> shift : 0;
+}
+
+static inline void
+lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
+{
+    if (shift < 64) {
+        *r1 = x1 << shift | x0 >> (64 - shift);
+        *r0 = x0 << shift;
+    } else if (shift < 128) {
+        *r1 = x0 << (shift - 64);
+        *r0 = 0;
+    } else {
+        *r1 = 0;
+        *r0 = 0;
+    }
+}
+
+static inline void
+lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
+{
+    if (shift < 64) {
+        *r0 = x0 >> shift | x1 << (64 - shift);
+        *r1 = x1 >> shift;
+    } else if (shift < 128) {
+        *r0 = x1 >> (shift - 64);
+        *r1 = 0;
+    } else {
+        *r0 = 0;
+        *r1 = 0;
+    }
+}
+
+static inline void
+mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
+{
+    uint32_t mask = ((uint32_t)1 << 31) - 1;
+    uint64_t a0 = a & mask;
+    uint64_t a1 = a >> 31 & mask;
+    uint64_t b0 = b & mask;
+    uint64_t b1 = b >> 31 & mask;
+    uint64_t p0 = a0 * b0;
+    uint64_t p2 = a1 * b1;
+    uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
+    uint64_t s0 = p0;
+    uint64_t s1 = (s0 >> 31) + p1;
+    uint64_t s2 = (s1 >> 31) + p2;
+    *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
+    *x1 = s2 >> 2;
+}
+
+static inline
+void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
+{
+    uint64_t t0 = (uint64_t)(uint32_t)a * b;
+    uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
+    *x0 = t1 << 32 | (uint32_t)t0;
+    *x1 = t1 >> 32;
+}
+
+static inline void
+mul64x64(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
+{
+    uint64_t a0 = (uint32_t)a;
+    uint64_t a1 = a >> 32;
+    uint64_t b0 = (uint32_t)b;
+    uint64_t b1 = b >> 32;
+    uint64_t t1 = (a0 * b0 >> 32) + a1 * b0;
+    uint64_t t2 = a0 * b1;
+    uint64_t x = ((uint64_t)(uint32_t)t1 + (uint32_t)t2) >> 32;
+    x += t1 >> 32;
+    x += t2 >> 32;
+    x += a1 * b1;
+    *x0 = a * b;
+    *x1 = x;
+}
+
+static inline void
+add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
+       uint64_t b1)
+{
+    *x0 = a0 + b0;
+    *x1 = a1 + b1 + (*x0 < a0);
+}
+
+static inline void
+sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
+       uint64_t b1)
+{
+    *x0 = a0 - b0;
+    *x1 = a1 - b1 - (*x0 > a0);
+}
+
+static inline int
+cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
+{
+    return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
+}
+
+static inline uint16_t
+fp16_normalise(uint16_t mnt, int *exp)
+{
+    int shift;
+
+    if (!mnt) {
+        return 0;
+    }
+
+    for (shift = 8; shift; shift >>= 1) {
+        if (!(mnt >> (16 - shift))) {
+            mnt <<= shift;
+            *exp -= shift;
+        }
+    }
+    return mnt;
+}
+
+static inline uint32_t
+fp32_normalise(uint32_t mnt, int *exp)
+{
+    int shift;
+
+    if (!mnt) {
+        return 0;
+    }
+
+    for (shift = 16; shift; shift >>= 1) {
+        if (!(mnt >> (32 - shift))) {
+            mnt <<= shift;
+            *exp -= shift;
+        }
+    }
+    return mnt;
+}
+
+static inline uint64_t
+fp64_normalise(uint64_t mnt, int *exp)
+{
+    int shift;
+
+    if (!mnt) {
+        return 0;
+    }
+
+    for (shift = 32; shift; shift >>= 1) {
+        if (!(mnt >> (64 - shift))) {
+            mnt <<= shift;
+            *exp -= shift;
+        }
+    }
+    return mnt;
+}
+
+static inline void
+fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
+{
+    uint64_t x0 = *mnt0;
+    uint64_t x1 = *mnt1;
+    int shift;
+
+    if (!x0 && !x1) {
+        return;
+    }
+
+    if (!x1) {
+        x1 = x0;
+        x0 = 0;
+        *exp -= 64;
+    }
+
+    for (shift = 32; shift; shift >>= 1) {
+        if (!(x1 >> (64 - shift))) {
+            x1 = x1 << shift | x0 >> (64 - shift);
+            x0 <<= shift;
+            *exp -= shift;
+        }
+    }
+
+    *mnt0 = x0;
+    *mnt1 = x1;
+}
+
+static inline uint16_t
+fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
+{
+    return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
+}
+
+static inline uint32_t
+fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
+{
+    return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
+}
+
+static inline uint64_t
+fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
+{
+    return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
+}
+
+static inline uint16_t
+fp16_zero(int sgn)
+{
+    return fp16_pack(sgn, 0, 0);
+}
+
+static inline uint32_t
+fp32_zero(int sgn)
+{
+    return fp32_pack(sgn, 0, 0);
+}
+
+static inline uint64_t
+fp64_zero(int sgn)
+{
+    return fp64_pack(sgn, 0, 0);
+}
+
+static inline uint16_t
+fp16_max_normal(int sgn)
+{
+    return fp16_pack(sgn, 30, -1);
+}
+
+static inline uint32_t
+fp32_max_normal(int sgn)
+{
+    return fp32_pack(sgn, 254, -1);
+}
+
+static inline uint64_t
+fp64_max_normal(int sgn)
+{
+    return fp64_pack(sgn, 2046, -1);
+}
+
+static inline uint16_t
+fp16_infinity(int sgn)
+{
+    return fp16_pack(sgn, 31, 0);
+}
+
+static inline uint32_t
+fp32_infinity(int sgn)
+{
+    return fp32_pack(sgn, 255, 0);
+}
+
+static inline uint64_t
+fp64_infinity(int sgn)
+{
+    return fp64_pack(sgn, 2047, 0);
+}
+
+static inline uint16_t
+fp16_defaultNaN()
+{
+    return fp16_pack(0, 31, (uint16_t)1 << 9);
+}
+
+static inline uint32_t
+fp32_defaultNaN()
+{
+    return fp32_pack(0, 255, (uint32_t)1 << 22);
+}
+
+static inline uint64_t
+fp64_defaultNaN()
+{
+    return fp64_pack(0, 2047, (uint64_t)1 << 51);
+}
+
+static inline void
+fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
+            int *flags)
+{
+    *sgn = x >> 15;
+    *exp = x >> 10 & 31;
+    *mnt = x & (((uint16_t)1 << 10) - 1);
+
+    // Handle subnormals:
+    if (*exp) {
+        *mnt |= (uint16_t)1 << 10;
+    } else {
+        ++*exp;
+        // There is no flush to zero in this case!
+    }
+}
+
+static inline void
+fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
+            int *flags)
+{
+    *sgn = x >> 31;
+    *exp = x >> 23 & 255;
+    *mnt = x & (((uint32_t)1 << 23) - 1);
+
+    // Handle subnormals:
+    if (*exp) {
+        *mnt |= (uint32_t)1 << 23;
+    } else {
+        ++*exp;
+        if ((mode & FPLIB_FZ) && *mnt) {
+            *flags |= FPLIB_IDC;
+            *mnt = 0;
+        }
+    }
+}
+
+static inline void
+fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
+            int *flags)
+{
+    *sgn = x >> 63;
+    *exp = x >> 52 & 2047;
+    *mnt = x & (((uint64_t)1 << 52) - 1);
+
+    // Handle subnormals:
+    if (*exp) {
+        *mnt |= (uint64_t)1 << 52;
+    } else {
+        ++*exp;
+        if ((mode & FPLIB_FZ) && *mnt) {
+            *flags |= FPLIB_IDC;
+            *mnt = 0;
+        }
+    }
+}
+
+static inline uint32_t
+fp32_process_NaN(uint32_t a, int mode, int *flags)
+{
+    if (!(a >> 22 & 1)) {
+        *flags |= FPLIB_IOC;
+        a |= (uint32_t)1 << 22;
+    }
+    return mode & FPLIB_DN ? fp32_defaultNaN() : a;
+}
+
+static inline uint64_t
+fp64_process_NaN(uint64_t a, int mode, int *flags)
+{
+    if (!(a >> 51 & 1)) {
+        *flags |= FPLIB_IOC;
+        a |= (uint64_t)1 << 51;
+    }
+    return mode & FPLIB_DN ? fp64_defaultNaN() : a;
+}
+
+static uint32_t
+fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
+{
+    int a_exp = a >> 23 & 255;
+    uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
+    int b_exp = b >> 23 & 255;
+    uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
+
+    // Handle signalling NaNs:
+    if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
+        return fp32_process_NaN(a, mode, flags);
+    if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
+        return fp32_process_NaN(b, mode, flags);
+
+    // Handle quiet NaNs:
+    if (a_exp == 255 && a_mnt)
+        return fp32_process_NaN(a, mode, flags);
+    if (b_exp == 255 && b_mnt)
+        return fp32_process_NaN(b, mode, flags);
+
+    return 0;
+}
+
+static uint64_t
+fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
+{
+    int a_exp = a >> 52 & 2047;
+    uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
+    int b_exp = b >> 52 & 2047;
+    uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
+
+    // Handle signalling NaNs:
+    if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
+        return fp64_process_NaN(a, mode, flags);
+    if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
+        return fp64_process_NaN(b, mode, flags);
+
+    // Handle quiet NaNs:
+    if (a_exp == 2047 && a_mnt)
+        return fp64_process_NaN(a, mode, flags);
+    if (b_exp == 2047 && b_mnt)
+        return fp64_process_NaN(b, mode, flags);
+
+    return 0;
+}
+
+static uint32_t
+fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
+{
+    int a_exp = a >> 23 & 255;
+    uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
+    int b_exp = b >> 23 & 255;
+    uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
+    int c_exp = c >> 23 & 255;
+    uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
+
+    // Handle signalling NaNs:
+    if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
+        return fp32_process_NaN(a, mode, flags);
+    if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
+        return fp32_process_NaN(b, mode, flags);
+    if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
+        return fp32_process_NaN(c, mode, flags);
+
+    // Handle quiet NaNs:
+    if (a_exp == 255 && a_mnt)
+        return fp32_process_NaN(a, mode, flags);
+    if (b_exp == 255 && b_mnt)
+        return fp32_process_NaN(b, mode, flags);
+    if (c_exp == 255 && c_mnt)
+        return fp32_process_NaN(c, mode, flags);
+
+    return 0;
+}
+
+static uint64_t
+fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
+{
+    int a_exp = a >> 52 & 2047;
+    uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
+    int b_exp = b >> 52 & 2047;
+    uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
+    int c_exp = c >> 52 & 2047;
+    uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
+
+    // Handle signalling NaNs:
+    if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
+        return fp64_process_NaN(a, mode, flags);
+    if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
+        return fp64_process_NaN(b, mode, flags);
+    if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
+        return fp64_process_NaN(c, mode, flags);
+
+    // Handle quiet NaNs:
+    if (a_exp == 2047 && a_mnt)
+        return fp64_process_NaN(a, mode, flags);
+    if (b_exp == 2047 && b_mnt)
+        return fp64_process_NaN(b, mode, flags);
+    if (c_exp == 2047 && c_mnt)
+        return fp64_process_NaN(c, mode, flags);
+
+    return 0;
+}
+
+static uint16_t
+fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
+{
+    int biased_exp; // non-negative exponent value for result
+    uint16_t int_mant; // mantissa for result, less than (1 << 11)
+    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
+
+    assert(rm != FPRounding_TIEAWAY);
+
+    // There is no flush to zero in this case!
+
+    // The bottom 5 bits of mnt are orred together:
+    mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
+
+    if (exp > 0) {
+        biased_exp = exp;
+        int_mant = mnt >> 2;
+        error = mnt & 3;
+    } else {
+        biased_exp = 0;
+        int_mant = lsr16(mnt, 3 - exp);
+        error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
+    }
+
+    if (!biased_exp && error) { // xx should also check fpscr_val<11>
+        *flags |= FPLIB_UFC;
+    }
+
+    // Round up:
+    if ((rm == FPLIB_RN && (error == 3 ||
+                            (error == 2 && (int_mant & 1)))) ||
+        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
+        ++int_mant;
+        if (int_mant == (uint32_t)1 << 10) {
+            // Rounded up from denormalized to normalized
+            biased_exp = 1;
+        }
+        if (int_mant == (uint32_t)1 << 11) {
+            // Rounded up to next exponent
+            ++biased_exp;
+            int_mant >>= 1;
+        }
+    }
+
+    // Handle rounding to odd aka Von Neumann rounding:
+    if (error && rm == FPRounding_ODD)
+        int_mant |= 1;
+
+    // Handle overflow:
+    if (!(mode & FPLIB_AHP)) {
+        if (biased_exp >= 31) {
+            *flags |= FPLIB_OFC | FPLIB_IXC;
+            if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
+                (rm == FPLIB_RM && sgn)) {
+                return fp16_infinity(sgn);
+            } else {
+                return fp16_max_normal(sgn);
+            }
+        }
+    } else {
+        if (biased_exp >= 32) {
+            *flags |= FPLIB_IOC;
+            return fp16_pack(sgn, 31, -1);
+        }
+    }
+
+    if (error) {
+        *flags |= FPLIB_IXC;
+    }
+
+    return fp16_pack(sgn, biased_exp, int_mant);
+}
+
+static uint32_t
+fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
+{
+    int biased_exp; // non-negative exponent value for result
+    uint32_t int_mant; // mantissa for result, less than (1 << 24)
+    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
+
+    assert(rm != FPRounding_TIEAWAY);
+
+    // Flush to zero:
+    if ((mode & FPLIB_FZ) && exp < 1) {
+        *flags |= FPLIB_UFC;
+        return fp32_zero(sgn);
+    }
+
+    // The bottom 8 bits of mnt are orred together:
+    mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
+
+    if (exp > 0) {
+        biased_exp = exp;
+        int_mant = mnt >> 2;
+        error = mnt & 3;
+    } else {
+        biased_exp = 0;
+        int_mant = lsr32(mnt, 3 - exp);
+        error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
+    }
+
+    if (!biased_exp && error) { // xx should also check fpscr_val<11>
+        *flags |= FPLIB_UFC;
+    }
+
+    // Round up:
+    if ((rm == FPLIB_RN && (error == 3 ||
+                            (error == 2 && (int_mant & 1)))) ||
+        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
+        ++int_mant;
+        if (int_mant == (uint32_t)1 << 23) {
+            // Rounded up from denormalized to normalized
+            biased_exp = 1;
+        }
+        if (int_mant == (uint32_t)1 << 24) {
+            // Rounded up to next exponent
+            ++biased_exp;
+            int_mant >>= 1;
+        }
+    }
+
+    // Handle rounding to odd aka Von Neumann rounding:
+    if (error && rm == FPRounding_ODD)
+        int_mant |= 1;
+
+    // Handle overflow:
+    if (biased_exp >= 255) {
+        *flags |= FPLIB_OFC | FPLIB_IXC;
+        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
+            (rm == FPLIB_RM && sgn)) {
+            return fp32_infinity(sgn);
+        } else {
+            return fp32_max_normal(sgn);
+        }
+    }
+
+    if (error) {
+        *flags |= FPLIB_IXC;
+    }
+
+    return fp32_pack(sgn, biased_exp, int_mant);
+}
+
+static uint32_t
+fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
+{
+    return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
+}
+
+static uint64_t
+fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
+{
+    int biased_exp; // non-negative exponent value for result
+    uint64_t int_mant; // mantissa for result, less than (1 << 52)
+    int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
+
+    assert(rm != FPRounding_TIEAWAY);
+
+    // Flush to zero:
+    if ((mode & FPLIB_FZ) && exp < 1) {
+        *flags |= FPLIB_UFC;
+        return fp64_zero(sgn);
+    }
+
+    // The bottom 11 bits of mnt are orred together:
+    mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
+
+    if (exp > 0) {
+        biased_exp = exp;
+        int_mant = mnt >> 2;
+        error = mnt & 3;
+    } else {
+        biased_exp = 0;
+        int_mant = lsr64(mnt, 3 - exp);
+        error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
+    }
+
+    if (!biased_exp && error) { // xx should also check fpscr_val<11>
+        *flags |= FPLIB_UFC;
+    }
+
+    // Round up:
+    if ((rm == FPLIB_RN && (error == 3 ||
+                            (error == 2 && (int_mant & 1)))) ||
+        (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
+        ++int_mant;
+        if (int_mant == (uint64_t)1 << 52) {
+            // Rounded up from denormalized to normalized
+            biased_exp = 1;
+        }
+        if (int_mant == (uint64_t)1 << 53) {
+            // Rounded up to next exponent
+            ++biased_exp;
+            int_mant >>= 1;
+        }
+    }
+
+    // Handle rounding to odd aka Von Neumann rounding:
+    if (error && rm == FPRounding_ODD)
+        int_mant |= 1;
+
+    // Handle overflow:
+    if (biased_exp >= 2047) {
+        *flags |= FPLIB_OFC | FPLIB_IXC;
+        if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
+            (rm == FPLIB_RM && sgn)) {
+            return fp64_infinity(sgn);
+        } else {
+            return fp64_max_normal(sgn);
+        }
+    }
+
+    if (error) {
+        *flags |= FPLIB_IXC;
+    }
+
+    return fp64_pack(sgn, biased_exp, int_mant);
+}
+
+static uint64_t
+fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
+{
+    return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
+}
+
+static int
+fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp;
+    uint32_t a_mnt, b_mnt;
+
+    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
+        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
+        if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
+            (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
+            *flags |= FPLIB_IOC;
+        return 0;
+    }
+    return a == b || (!a_mnt && !b_mnt);
+}
+
+static int
+fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp;
+    uint32_t a_mnt, b_mnt;
+
+    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
+        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
+        *flags |= FPLIB_IOC;
+        return 0;
+    }
+    if (!a_mnt && !b_mnt)
+        return 1;
+    if (a_sgn != b_sgn)
+        return b_sgn;
+    if (a_exp != b_exp)
+        return a_sgn ^ (a_exp > b_exp);
+    if (a_mnt != b_mnt)
+        return a_sgn ^ (a_mnt > b_mnt);
+    return 1;
+}
+
+static int
+fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp;
+    uint32_t a_mnt, b_mnt;
+
+    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
+        (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
+        *flags |= FPLIB_IOC;
+        return 0;
+    }
+    if (!a_mnt && !b_mnt)
+        return 0;
+    if (a_sgn != b_sgn)
+        return b_sgn;
+    if (a_exp != b_exp)
+        return a_sgn ^ (a_exp > b_exp);
+    if (a_mnt != b_mnt)
+        return a_sgn ^ (a_mnt > b_mnt);
+    return 0;
+}
+
+static int
+fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp;
+    uint64_t a_mnt, b_mnt;
+
+    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
+        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
+        if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
+            (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
+            *flags |= FPLIB_IOC;
+        return 0;
+    }
+    return a == b || (!a_mnt && !b_mnt);
+}
+
+static int
+fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp;
+    uint64_t a_mnt, b_mnt;
+
+    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
+        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
+        *flags |= FPLIB_IOC;
+        return 0;
+    }
+    if (!a_mnt && !b_mnt)
+        return 1;
+    if (a_sgn != b_sgn)
+        return b_sgn;
+    if (a_exp != b_exp)
+        return a_sgn ^ (a_exp > b_exp);
+    if (a_mnt != b_mnt)
+        return a_sgn ^ (a_mnt > b_mnt);
+    return 1;
+}
+
+static int
+fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp;
+    uint64_t a_mnt, b_mnt;
+
+    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
+        (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
+        *flags |= FPLIB_IOC;
+        return 0;
+    }
+    if (!a_mnt && !b_mnt)
+        return 0;
+    if (a_sgn != b_sgn)
+        return b_sgn;
+    if (a_exp != b_exp)
+        return a_sgn ^ (a_exp > b_exp);
+    if (a_mnt != b_mnt)
+        return a_sgn ^ (a_mnt > b_mnt);
+    return 0;
+}
+
+static uint32_t
+fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+    uint32_t a_mnt, b_mnt, x, x_mnt;
+
+    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
+        return x;
+    }
+
+    b_sgn ^= neg;
+
+    // Handle infinities and zeroes:
+    if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
+        *flags |= FPLIB_IOC;
+        return fp32_defaultNaN();
+    } else if (a_exp == 255) {
+        return fp32_infinity(a_sgn);
+    } else if (b_exp == 255) {
+        return fp32_infinity(b_sgn);
+    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
+        return fp32_zero(a_sgn);
+    }
+
+    a_mnt <<= 3;
+    b_mnt <<= 3;
+    if (a_exp >= b_exp) {
+        b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
+                 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
+        b_exp = a_exp;
+    } else {
+        a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
+                 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
+        a_exp = b_exp;
+    }
+    x_sgn = a_sgn;
+    x_exp = a_exp;
+    if (a_sgn == b_sgn) {
+        x_mnt = a_mnt + b_mnt;
+    } else if (a_mnt >= b_mnt) {
+        x_mnt = a_mnt - b_mnt;
+    } else {
+        x_sgn ^= 1;
+        x_mnt = b_mnt - a_mnt;
+    }
+
+    if (!x_mnt) {
+        // Sign of exact zero result depends on rounding mode
+        return fp32_zero((mode & 3) == 2);
+    }
+
+    x_mnt = fp32_normalise(x_mnt, &x_exp);
+
+    return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
+}
+
+static uint64_t
+fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+    uint64_t a_mnt, b_mnt, x, x_mnt;
+
+    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
+        return x;
+    }
+
+    b_sgn ^= neg;
+
+    // Handle infinities and zeroes:
+    if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
+        *flags |= FPLIB_IOC;
+        return fp64_defaultNaN();
+    } else if (a_exp == 2047) {
+        return fp64_infinity(a_sgn);
+    } else if (b_exp == 2047) {
+        return fp64_infinity(b_sgn);
+    } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
+        return fp64_zero(a_sgn);
+    }
+
+    a_mnt <<= 3;
+    b_mnt <<= 3;
+    if (a_exp >= b_exp) {
+        b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
+                 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
+        b_exp = a_exp;
+    } else {
+        a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
+                 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
+        a_exp = b_exp;
+    }
+    x_sgn = a_sgn;
+    x_exp = a_exp;
+    if (a_sgn == b_sgn) {
+        x_mnt = a_mnt + b_mnt;
+    } else if (a_mnt >= b_mnt) {
+        x_mnt = a_mnt - b_mnt;
+    } else {
+        x_sgn ^= 1;
+        x_mnt = b_mnt - a_mnt;
+    }
+
+    if (!x_mnt) {
+        // Sign of exact zero result depends on rounding mode
+        return fp64_zero((mode & 3) == 2);
+    }
+
+    x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+    return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
+}
+
+static uint32_t
+fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+    uint32_t a_mnt, b_mnt, x;
+    uint64_t x_mnt;
+
+    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((x = fp32_process_NaNs(a, b, mode, flags))) {
+        return x;
+    }
+
+    // Handle infinities and zeroes:
+    if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
+        *flags |= FPLIB_IOC;
+        return fp32_defaultNaN();
+    } else if (a_exp == 255 || b_exp == 255) {
+        return fp32_infinity(a_sgn ^ b_sgn);
+    } else if (!a_mnt || !b_mnt) {
+        return fp32_zero(a_sgn ^ b_sgn);
+    }
+
+    // Multiply and normalise:
+    x_sgn = a_sgn ^ b_sgn;
+    x_exp = a_exp + b_exp - 110;
+    x_mnt = (uint64_t)a_mnt * b_mnt;
+    x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+    // Convert to 32 bits, collapsing error into bottom bit:
+    x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
+
+    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
+}
+
+static uint64_t
+fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+    uint64_t a_mnt, b_mnt, x;
+    uint64_t x0_mnt, x1_mnt;
+
+    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((x = fp64_process_NaNs(a, b, mode, flags))) {
+        return x;
+    }
+
+    // Handle infinities and zeroes:
+    if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
+        *flags |= FPLIB_IOC;
+        return fp64_defaultNaN();
+    } else if (a_exp == 2047 || b_exp == 2047) {
+        return fp64_infinity(a_sgn ^ b_sgn);
+    } else if (!a_mnt || !b_mnt) {
+        return fp64_zero(a_sgn ^ b_sgn);
+    }
+
+    // Multiply and normalise:
+    x_sgn = a_sgn ^ b_sgn;
+    x_exp = a_exp + b_exp - 1000;
+    mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
+    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
+
+    // Convert to 64 bits, collapsing error into bottom bit:
+    x0_mnt = x1_mnt << 1 | !!x0_mnt;
+
+    return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
+}
+
+static uint32_t
+fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
+            int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
+    uint32_t a_mnt, b_mnt, c_mnt, x;
+    uint64_t x_mnt, y_mnt;
+
+    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+    fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
+
+    x = fp32_process_NaNs3(a, b, c, mode, flags);
+
+    // Quiet NaN added to product of zero and infinity:
+    if (a_exp == 255 && (a_mnt >> 22 & 1) &&
+        ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
+         (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
+        x = fp32_defaultNaN();
+        *flags |= FPLIB_IOC;
+    }
+
+    if (x) {
+        return x;
+    }
+
+    // Handle infinities and zeroes:
+    if ((b_exp == 255 && !c_mnt) ||
+        (c_exp == 255 && !b_mnt) ||
+        (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
+         (a_sgn != (b_sgn ^ c_sgn)))) {
+        *flags |= FPLIB_IOC;
+        return fp32_defaultNaN();
+    }
+    if (a_exp == 255)
+        return fp32_infinity(a_sgn);
+    if (b_exp == 255 || c_exp == 255)
+        return fp32_infinity(b_sgn ^ c_sgn);
+    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
+        return fp32_zero(a_sgn);
+
+    x_sgn = a_sgn;
+    x_exp = a_exp + 13;
+    x_mnt = (uint64_t)a_mnt << 27;
+
+    // Multiply:
+    y_sgn = b_sgn ^ c_sgn;
+    y_exp = b_exp + c_exp - 113;
+    y_mnt = (uint64_t)b_mnt * c_mnt << 3;
+    if (!y_mnt) {
+        y_exp = x_exp;
+    }
+
+    // Add:
+    if (x_exp >= y_exp) {
+        y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
+                 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
+        y_exp = x_exp;
+    } else {
+        x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
+                 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
+        x_exp = y_exp;
+    }
+    if (x_sgn == y_sgn) {
+        x_mnt = x_mnt + y_mnt;
+    } else if (x_mnt >= y_mnt) {
+        x_mnt = x_mnt - y_mnt;
+    } else {
+        x_sgn ^= 1;
+        x_mnt = y_mnt - x_mnt;
+    }
+
+    if (!x_mnt) {
+        // Sign of exact zero result depends on rounding mode
+        return fp32_zero((mode & 3) == 2);
+    }
+
+    // Normalise and convert to 32 bits, collapsing error into bottom bit:
+    x_mnt = fp64_normalise(x_mnt, &x_exp);
+    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
+
+    return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
+}
+
+static uint64_t
+fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
+            int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
+    uint64_t a_mnt, b_mnt, c_mnt, x;
+    uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
+
+    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+    fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
+
+    x = fp64_process_NaNs3(a, b, c, mode, flags);
+
+    // Quiet NaN added to product of zero and infinity:
+    if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
+        ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
+         (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
+        x = fp64_defaultNaN();
+        *flags |= FPLIB_IOC;
+    }
+
+    if (x) {
+        return x;
+    }
+
+    // Handle infinities and zeroes:
+    if ((b_exp == 2047 && !c_mnt) ||
+        (c_exp == 2047 && !b_mnt) ||
+        (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
+         (a_sgn != (b_sgn ^ c_sgn)))) {
+        *flags |= FPLIB_IOC;
+        return fp64_defaultNaN();
+    }
+    if (a_exp == 2047)
+        return fp64_infinity(a_sgn);
+    if (b_exp == 2047 || c_exp == 2047)
+        return fp64_infinity(b_sgn ^ c_sgn);
+    if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
+        return fp64_zero(a_sgn);
+
+    x_sgn = a_sgn;
+    x_exp = a_exp + 11;
+    x0_mnt = 0;
+    x1_mnt = a_mnt;
+
+    // Multiply:
+    y_sgn = b_sgn ^ c_sgn;
+    y_exp = b_exp + c_exp - 1003;
+    mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
+    if (!y0_mnt && !y1_mnt) {
+        y_exp = x_exp;
+    }
+
+    // Add:
+    if (x_exp >= y_exp) {
+        uint64_t t0, t1;
+        lsl128(&t0, &t1, y0_mnt, y1_mnt,
+               x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
+        lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
+        y0_mnt |= !!(t0 | t1);
+        y_exp = x_exp;
+    } else {
+        uint64_t t0, t1;
+        lsl128(&t0, &t1, x0_mnt, x1_mnt,
+               y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
+        lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
+        x0_mnt |= !!(t0 | t1);
+        x_exp = y_exp;
+    }
+    if (x_sgn == y_sgn) {
+        add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
+    } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
+        sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
+    } else {
+        x_sgn ^= 1;
+        sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
+    }
+
+    if (!x0_mnt && !x1_mnt) {
+        // Sign of exact zero result depends on rounding mode
+        return fp64_zero((mode & 3) == 2);
+    }
+
+    // Normalise and convert to 64 bits, collapsing error into bottom bit:
+    fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
+    x0_mnt = x1_mnt << 1 | !!x0_mnt;
+
+    return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
+}
+
+static uint32_t
+fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
+    uint32_t a_mnt, b_mnt, x;
+    uint64_t x_mnt;
+
+    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((x = fp32_process_NaNs(a, b, mode, flags)))
+        return x;
+
+    // Handle infinities and zeroes:
+    if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
+        *flags |= FPLIB_IOC;
+        return fp32_defaultNaN();
+    }
+    if (a_exp == 255 || !b_mnt) {
+        if (a_exp != 255)
+            *flags |= FPLIB_DZC;
+        return fp32_infinity(a_sgn ^ b_sgn);
+    }
+    if (!a_mnt || b_exp == 255)
+        return fp32_zero(a_sgn ^ b_sgn);
+
+    // Divide, setting bottom bit if inexact:
+    a_mnt = fp32_normalise(a_mnt, &a_exp);
+    x_sgn = a_sgn ^ b_sgn;
+    x_exp = a_exp - b_exp + 172;
+    x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
+    x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
+
+    // Normalise and convert to 32 bits, collapsing error into bottom bit:
+    x_mnt = fp64_normalise(x_mnt, &x_exp);
+    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
+
+    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
+}
+
+static uint64_t
+fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
+{
+    int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
+    uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
+
+    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+    fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
+
+    if ((x = fp64_process_NaNs(a, b, mode, flags)))
+        return x;
+
+    // Handle infinities and zeroes:
+    if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
+        *flags |= FPLIB_IOC;
+        return fp64_defaultNaN();
+    }
+    if (a_exp == 2047 || !b_mnt) {
+        if (a_exp != 2047)
+            *flags |= FPLIB_DZC;
+        return fp64_infinity(a_sgn ^ b_sgn);
+    }
+    if (!a_mnt || b_exp == 2047)
+        return fp64_zero(a_sgn ^ b_sgn);
+
+    // Find reciprocal of divisor with Newton-Raphson:
+    a_mnt = fp64_normalise(a_mnt, &a_exp);
+    b_mnt = fp64_normalise(b_mnt, &b_exp);
+    x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
+    mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
+    sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
+    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
+    mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
+    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
+
+    // Multiply by dividend:
+    x_sgn = a_sgn ^ b_sgn;
+    x_exp = a_exp - b_exp + 1031;
+    mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
+    lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
+    x_mnt = x1_mnt;
+
+    // This is an underestimate, so try adding one:
+    mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
+    c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
+    if (c <= 0) {
+        ++x_mnt;
+    }
+
+    x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
+}
+
+static void
+set_fpscr0(FPSCR &fpscr, int flags)
+{
+    if (flags & FPLIB_IDC) {
+        fpscr.idc = 1;
+    }
+    if (flags & FPLIB_IOC) {
+        fpscr.ioc = 1;
+    }
+    if (flags & FPLIB_DZC) {
+        fpscr.dzc = 1;
+    }
+    if (flags & FPLIB_OFC) {
+        fpscr.ofc = 1;
+    }
+    if (flags & FPLIB_UFC) {
+        fpscr.ufc = 1;
+    }
+    if (flags & FPLIB_IXC) {
+        fpscr.ixc = 1;
+    }
+}
+
+static uint32_t
+fp32_sqrt(uint32_t a, int mode, int *flags)
+{
+    int a_sgn, a_exp, x_sgn, x_exp;
+    uint32_t a_mnt, x, x_mnt;
+    uint64_t t0, t1;
+
+    fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+
+    // Handle NaNs:
+    if (a_exp == 255 && (uint32_t)(a_mnt << 9))
+        return fp32_process_NaN(a, mode, flags);
+
+    // Handle infinities and zeroes:
+    if (!a_mnt) {
+        return fp32_zero(a_sgn);
+    }
+    if (a_exp == 255 && !a_sgn) {
+        return fp32_infinity(a_sgn);
+    }
+    if (a_sgn) {
+        *flags |= FPLIB_IOC;
+        return fp32_defaultNaN();
+    }
+
+    a_mnt = fp32_normalise(a_mnt, &a_exp);
+    if (!(a_exp & 1)) {
+        ++a_exp;
+        a_mnt >>= 1;
+    }
+
+    // x = (a * 3 + 5) / 8
+    x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
+
+    // x = (a / x + x) / 2; // 16-bit accuracy
+    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
+
+    // x = (a / x + x) / 2; // 16-bit accuracy
+    x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
+
+    // x = (a / x + x) / 2; // 32-bit accuracy
+    x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
+
+    x_sgn = 0;
+    x_exp = (a_exp + 147) >> 1;
+    x_mnt = ((x - (1 << 5)) >> 6) + 1;
+    t1 = (uint64_t)x_mnt * x_mnt;
+    t0 = (uint64_t)a_mnt << 19;
+    if (t1 > t0) {
+        --x_mnt;
+    }
+
+    x_mnt = fp32_normalise(x_mnt, &x_exp);
+
+    return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
+}
+
+static uint64_t
+fp64_sqrt(uint64_t a, int mode, int *flags)
+{
+    int a_sgn, a_exp, x_sgn, x_exp, c;
+    uint64_t a_mnt, x_mnt, r, x0, x1;
+    uint32_t x;
+
+    fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
+
+    // Handle NaNs:
+    if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
+        return fp64_process_NaN(a, mode, flags);
+    }
+
+    // Handle infinities and zeroes:
+    if (!a_mnt)
+        return fp64_zero(a_sgn);
+    if (a_exp == 2047 && !a_sgn)
+        return fp64_infinity(a_sgn);
+    if (a_sgn) {
+        *flags |= FPLIB_IOC;
+        return fp64_defaultNaN();
+    }
+
+    a_mnt = fp64_normalise(a_mnt, &a_exp);
+    if (a_exp & 1) {
+        ++a_exp;
+        a_mnt >>= 1;
+    }
+
+    // x = (a * 3 + 5) / 8
+    x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
+
+    // x = (a / x + x) / 2; // 16-bit accuracy
+    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
+
+    // x = (a / x + x) / 2; // 16-bit accuracy
+    x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
+
+    // x = (a / x + x) / 2; // 32-bit accuracy
+    x = ((a_mnt / x) >> 2) + (x >> 1);
+
+    // r = 1 / x; // 32-bit accuracy
+    r = ((uint64_t)1 << 62) / x;
+
+    // r = r * (2 - x * r); // 64-bit accuracy
+    mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
+    lsr128(&x0, &x1, x0, x1, 31);
+
+    // x = (x + a * r) / 2; // 64-bit accuracy
+    mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
+    lsl128(&x0, &x1, x0, x1, 5);
+    lsr128(&x0, &x1, x0, x1, 56);
+
+    x0 = ((uint64_t)x << 31) + (x0 >> 1);
+
+    x_sgn = 0;
+    x_exp = (a_exp + 1053) >> 1;
+    x_mnt = x0;
+    x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
+    mul62x62(&x0, &x1, x_mnt, x_mnt);
+    lsl128(&x0, &x1, x0, x1, 19);
+    c = cmp128(x0, x1, 0, a_mnt);
+    if (c > 0)
+        --x_mnt;
+
+    x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+    return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
+}
+
+static int
+modeConv(FPSCR fpscr)
+{
+    return (((int) fpscr) >> 22) & 0xF;
+}
+
+static void
+set_fpscr(FPSCR &fpscr, int flags)
+{
+    // translate back to FPSCR
+    bool underflow = false;
+    if (flags & FPLIB_IDC) {
+        fpscr.idc = 1;
+    }
+    if (flags & FPLIB_IOC) {
+        fpscr.ioc = 1;
+    }
+    if (flags & FPLIB_DZC) {
+        fpscr.dzc = 1;
+    }
+    if (flags & FPLIB_OFC) {
+        fpscr.ofc = 1;
+    }
+    if (flags & FPLIB_UFC) {
+        underflow = true; //xx Why is this required?
+        fpscr.ufc = 1;
+    }
+    if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
+        fpscr.ixc = 1;
+    }
+}
+
+template <>
+bool
+fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
+{
+    int flags = 0;
+    int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
+    set_fpscr(fpscr, flags);
+    return x;
+}
+
+template <>
+bool
+fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
+{
+    int flags = 0;
+    int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
+    set_fpscr(fpscr, flags);
+    return x;
+}
+
+template <>
+bool
+fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
+{
+    int flags = 0;
+    int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
+    set_fpscr(fpscr, flags);
+    return x;
+}
+
+template <>
+bool
+fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
+{
+    int flags = 0;
+    int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
+    set_fpscr(fpscr, flags);
+    return x;
+}
+
+template <>
+bool
+fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
+{
+    int flags = 0;
+    int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
+    set_fpscr(fpscr, flags);
+    return x;
+}
+
+template <>
+bool
+fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
+{
+    int flags = 0;
+    int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
+    set_fpscr(fpscr, flags);
+    return x;
+}
+
+template <>
+uint32_t
+fplibAbs(uint32_t op)
+{
+    return op & ~((uint32_t)1 << 31);
+}
+
+template <>
+uint64_t
+fplibAbs(uint64_t op)
+{
+    return op & ~((uint64_t)1 << 63);
+}
+
+template <>
+uint32_t
+fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint64_t
+fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+int
+fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2, result;
+    uint32_t mnt1, mnt2;
+
+    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
+        (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
+        result = 3;
+        if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
+            (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
+            signal_nans)
+            flags |= FPLIB_IOC;
+    } else {
+        if (op1 == op2 || (!mnt1 && !mnt2)) {
+            result = 6;
+        } else if (sgn1 != sgn2) {
+            result = sgn1 ? 8 : 2;
+        } else if (exp1 != exp2) {
+            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
+        } else {
+            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+int
+fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2, result;
+    uint64_t mnt1, mnt2;
+
+    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
+        (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
+        result = 3;
+        if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
+            (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
+            signal_nans)
+            flags |= FPLIB_IOC;
+    } else {
+        if (op1 == op2 || (!mnt1 && !mnt2)) {
+            result = 6;
+        } else if (sgn1 != sgn2) {
+            result = sgn1 ? 8 : 2;
+        } else if (exp1 != exp2) {
+            result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
+        } else {
+            result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+static uint16_t
+fp16_FPConvertNaN_32(uint32_t op)
+{
+    return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
+}
+
+static uint16_t
+fp16_FPConvertNaN_64(uint64_t op)
+{
+    return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
+}
+
+static uint32_t
+fp32_FPConvertNaN_16(uint16_t op)
+{
+    return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
+}
+
+static uint32_t
+fp32_FPConvertNaN_64(uint64_t op)
+{
+    return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
+}
+
+static uint64_t
+fp64_FPConvertNaN_16(uint16_t op)
+{
+    return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
+}
+
+static uint64_t
+fp64_FPConvertNaN_32(uint32_t op)
+{
+    return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
+}
+
+static uint32_t
+fp32_FPOnePointFive(int sgn)
+{
+    return fp32_pack(sgn, 127, (uint64_t)1 << 22);
+}
+
+static uint64_t
+fp64_FPOnePointFive(int sgn)
+{
+    return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
+}
+
+static uint32_t
+fp32_FPThree(int sgn)
+{
+    return fp32_pack(sgn, 128, (uint64_t)1 << 22);
+}
+
+static uint64_t
+fp64_FPThree(int sgn)
+{
+    return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
+}
+
+static uint32_t
+fp32_FPTwo(int sgn)
+{
+    return fp32_pack(sgn, 128, 0);
+}
+
+static uint64_t
+fp64_FPTwo(int sgn)
+{
+    return fp64_pack(sgn, 1024, 0);
+}
+
+template <>
+uint16_t
+fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint32_t mnt;
+    uint16_t result;
+
+    // Unpack floating-point operand optionally with flush-to-zero:
+    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    bool alt_hp = fpscr.ahp;
+
+    if (exp == 255 && (uint32_t)(mnt << 9)) {
+        if (alt_hp) {
+            result = fp16_zero(sgn);
+        } else if (fpscr.dn) {
+            result = fp16_defaultNaN();
+        } else {
+            result = fp16_FPConvertNaN_32(op);
+        }
+        if (!(mnt >> 22 & 1) || alt_hp) {
+            flags |= FPLIB_IOC;
+        }
+    } else if (exp == 255) {
+        if (alt_hp) {
+            result = sgn << 15 | (uint16_t)0x7fff;
+            flags |= FPLIB_IOC;
+        } else {
+            result = fp16_infinity(sgn);
+        }
+    } else if (!mnt) {
+        result = fp16_zero(sgn);
+    } else {
+        result = fp16_round_(sgn, exp - 127 + 15,
+                             mnt >> 7 | !!(uint32_t)(mnt << 25),
+                             rounding, mode | alt_hp << 4, &flags);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint16_t
+fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint64_t mnt;
+    uint16_t result;
+
+    // Unpack floating-point operand optionally with flush-to-zero:
+    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    bool alt_hp = fpscr.ahp;
+
+    if (exp == 2047 && (uint64_t)(mnt << 12)) {
+        if (alt_hp) {
+            result = fp16_zero(sgn);
+        } else if (fpscr.dn) {
+            result = fp16_defaultNaN();
+        } else {
+            result = fp16_FPConvertNaN_64(op);
+        }
+        if (!(mnt >> 51 & 1) || alt_hp) {
+            flags |= FPLIB_IOC;
+        }
+    } else if (exp == 2047) {
+        if (alt_hp) {
+            result = sgn << 15 | (uint16_t)0x7fff;
+            flags |= FPLIB_IOC;
+        } else {
+            result = fp16_infinity(sgn);
+        }
+    } else if (!mnt) {
+        result = fp16_zero(sgn);
+    } else {
+        result = fp16_round_(sgn, exp - 1023 + 15,
+                             mnt >> 36 | !!(uint64_t)(mnt << 28),
+                             rounding, mode | alt_hp << 4, &flags);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint16_t mnt;
+    uint32_t result;
+
+    // Unpack floating-point operand optionally with flush-to-zero:
+    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
+        if (fpscr.dn) {
+            result = fp32_defaultNaN();
+        } else {
+            result = fp32_FPConvertNaN_16(op);
+        }
+        if (!(mnt >> 9 & 1)) {
+            flags |= FPLIB_IOC;
+        }
+    } else if (exp == 31 && !fpscr.ahp) {
+        result = fp32_infinity(sgn);
+    } else if (!mnt) {
+        result = fp32_zero(sgn);
+    } else {
+        mnt = fp16_normalise(mnt, &exp);
+        result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint64_t mnt;
+    uint32_t result;
+
+    // Unpack floating-point operand optionally with flush-to-zero:
+    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 2047 && (uint64_t)(mnt << 12)) {
+        if (fpscr.dn) {
+            result = fp32_defaultNaN();
+        } else {
+            result = fp32_FPConvertNaN_64(op);
+        }
+        if (!(mnt >> 51 & 1)) {
+            flags |= FPLIB_IOC;
+        }
+    } else if (exp == 2047) {
+        result = fp32_infinity(sgn);
+    } else if (!mnt) {
+        result = fp32_zero(sgn);
+    } else {
+        result = fp32_round_(sgn, exp - 1023 + 127,
+                             mnt >> 20 | !!(uint64_t)(mnt << 44),
+                             rounding, mode, &flags);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint16_t mnt;
+    uint64_t result;
+
+    // Unpack floating-point operand optionally with flush-to-zero:
+    fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
+        if (fpscr.dn) {
+            result = fp64_defaultNaN();
+        } else {
+            result = fp64_FPConvertNaN_16(op);
+        }
+        if (!(mnt >> 9 & 1)) {
+            flags |= FPLIB_IOC;
+        }
+    } else if (exp == 31 && !fpscr.ahp) {
+        result = fp64_infinity(sgn);
+    } else if (!mnt) {
+        result = fp64_zero(sgn);
+    } else {
+        mnt = fp16_normalise(mnt, &exp);
+        result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint32_t mnt;
+    uint64_t result;
+
+    // Unpack floating-point operand optionally with flush-to-zero:
+    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 255 && (uint32_t)(mnt << 9)) {
+        if (fpscr.dn) {
+            result = fp64_defaultNaN();
+        } else {
+            result = fp64_FPConvertNaN_32(op);
+        }
+        if (!(mnt >> 22 & 1)) {
+            flags |= FPLIB_IOC;
+        }
+    } else if (exp == 255) {
+        result = fp64_infinity(sgn);
+    } else if (!mnt) {
+        result = fp64_zero(sgn);
+    } else {
+        mnt = fp32_normalise(mnt, &exp);
+        result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint64_t
+fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint32_t
+fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint64_t
+fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+static uint32_t
+fp32_repack(int sgn, int exp, uint32_t mnt)
+{
+    return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
+}
+
+static uint64_t
+fp64_repack(int sgn, int exp, uint64_t mnt)
+{
+    return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
+}
+
+static void
+fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
+{
+    // Treat a single quiet-NaN as +Infinity/-Infinity
+    if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
+        *op1 = fp32_infinity(sgn);
+    if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
+        *op2 = fp32_infinity(sgn);
+}
+
+static void
+fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
+{
+    // Treat a single quiet-NaN as +Infinity/-Infinity
+    if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
+        *op1 = fp64_infinity(sgn);
+    if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
+        *op2 = fp64_infinity(sgn);
+}
+
+template <>
+uint32_t
+fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint32_t mnt1, mnt2, x, result;
+
+    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
+        result = x;
+    } else {
+        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
+                  fp32_repack(sgn1, exp1, mnt1) :
+                  fp32_repack(sgn2, exp2, mnt2));
+    }
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint64_t
+fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint64_t mnt1, mnt2, x, result;
+
+    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
+        result = x;
+    } else {
+        result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
+                  fp64_repack(sgn1, exp1, mnt1) :
+                  fp64_repack(sgn2, exp2, mnt2));
+    }
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint32_t
+fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    fp32_minmaxnum(&op1, &op2, 1);
+    return fplibMax<uint32_t>(op1, op2, fpscr);
+}
+
+template <>
+uint64_t
+fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    fp64_minmaxnum(&op1, &op2, 1);
+    return fplibMax<uint64_t>(op1, op2, fpscr);
+}
+
+template <>
+uint32_t
+fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint32_t mnt1, mnt2, x, result;
+
+    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
+        result = x;
+    } else {
+        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
+                  fp32_repack(sgn1, exp1, mnt1) :
+                  fp32_repack(sgn2, exp2, mnt2));
+    }
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint64_t
+fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint64_t mnt1, mnt2, x, result;
+
+    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
+        result = x;
+    } else {
+        result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
+                  fp64_repack(sgn1, exp1, mnt1) :
+                  fp64_repack(sgn2, exp2, mnt2));
+    }
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint32_t
+fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    fp32_minmaxnum(&op1, &op2, 0);
+    return fplibMin<uint32_t>(op1, op2, fpscr);
+}
+
+template <>
+uint64_t
+fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    fp64_minmaxnum(&op1, &op2, 0);
+    return fplibMin<uint64_t>(op1, op2, fpscr);
+}
+
+template <>
+uint32_t
+fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint64_t
+fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint32_t
+fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint32_t mnt1, mnt2, result;
+
+    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    result = fp32_process_NaNs(op1, op2, mode, &flags);
+    if (!result) {
+        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
+            result = fp32_FPTwo(sgn1 ^ sgn2);
+        } else if (exp1 == 255 || exp2 == 255) {
+            result = fp32_infinity(sgn1 ^ sgn2);
+        } else if (!mnt1 || !mnt2) {
+            result = fp32_zero(sgn1 ^ sgn2);
+        } else {
+            result = fp32_mul(op1, op2, mode, &flags);
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint64_t mnt1, mnt2, result;
+
+    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    result = fp64_process_NaNs(op1, op2, mode, &flags);
+    if (!result) {
+        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
+            result = fp64_FPTwo(sgn1 ^ sgn2);
+        } else if (exp1 == 2047 || exp2 == 2047) {
+            result = fp64_infinity(sgn1 ^ sgn2);
+        } else if (!mnt1 || !mnt2) {
+            result = fp64_zero(sgn1 ^ sgn2);
+        } else {
+            result = fp64_mul(op1, op2, mode, &flags);
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibNeg(uint32_t op)
+{
+    return op ^ (uint32_t)1 << 31;
+}
+
+template <>
+uint64_t
+fplibNeg(uint64_t op)
+{
+    return op ^ (uint64_t)1 << 63;
+}
+
+static const uint8_t recip_sqrt_estimate[256] = {
+    255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
+    226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
+    201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
+    180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
+    162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
+    145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
+    131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
+    118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
+    105, 104, 103, 101, 100,  99,  97,  96,  95,  93,  92,  91,  90,  88,  87,  86,
+    85,  84,  82,  81,  80,  79,  78,  77,  76,  75,  74,  72,  71,  70,  69,  68,
+    67,  66,  65,  64,  63,  62,  61,  60,  60,  59,  58,  57,  56,  55,  54,  53,
+    52,  51,  51,  50,  49,  48,  47,  46,  46,  45,  44,  43,  42,  42,  41,  40,
+    39,  38,  38,  37,  36,  35,  35,  34,  33,  33,  32,  31,  30,  30,  29,  28,
+    28,  27,  26,  26,  25,  24,  24,  23,  22,  22,  21,  20,  20,  19,  19,  18,
+    17,  17,  16,  16,  15,  14,  14,  13,  13,  12,  11,  11,  10,  10,   9,   9,
+    8,   8,   7,   6,   6,   5,   5,   4,   4,   3,   3,   2,   2,   1,   1,   0
+};
+
+template <>
+uint32_t
+fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint32_t mnt, result;
+
+    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 255 && (uint32_t)(mnt << 9)) {
+        result = fp32_process_NaN(op, mode, &flags);
+    } else if (!mnt) {
+        result = fp32_infinity(sgn);
+        flags |= FPLIB_DZC;
+    } else if (sgn) {
+        result = fp32_defaultNaN();
+        flags |= FPLIB_IOC;
+    } else if (exp == 255) {
+        result = fp32_zero(0);
+    } else {
+        exp += 8;
+        mnt = fp32_normalise(mnt, &exp);
+        mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
+        result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint64_t mnt, result;
+
+    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 2047 && (uint64_t)(mnt << 12)) {
+        result = fp64_process_NaN(op, mode, &flags);
+    } else if (!mnt) {
+        result = fp64_infinity(sgn);
+        flags |= FPLIB_DZC;
+    } else if (sgn) {
+        result = fp64_defaultNaN();
+        flags |= FPLIB_IOC;
+    } else if (exp == 2047) {
+        result = fp32_zero(0);
+    } else {
+        exp += 11;
+        mnt = fp64_normalise(mnt, &exp);
+        mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
+        result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint32_t mnt1, mnt2, result;
+
+    op1 = fplibNeg<uint32_t>(op1);
+    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    result = fp32_process_NaNs(op1, op2, mode, &flags);
+    if (!result) {
+        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
+            result = fp32_FPOnePointFive(0);
+        } else if (exp1 == 255 || exp2 == 255) {
+            result = fp32_infinity(sgn1 ^ sgn2);
+        } else {
+            result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint64_t mnt1, mnt2, result;
+
+    op1 = fplibNeg<uint64_t>(op1);
+    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    result = fp64_process_NaNs(op1, op2, mode, &flags);
+    if (!result) {
+        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
+            result = fp64_FPOnePointFive(0);
+        } else if (exp1 == 2047 || exp2 == 2047) {
+            result = fp64_infinity(sgn1 ^ sgn2);
+        } else {
+            result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint32_t mnt1, mnt2, result;
+
+    op1 = fplibNeg<uint32_t>(op1);
+    fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    result = fp32_process_NaNs(op1, op2, mode, &flags);
+    if (!result) {
+        if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
+            result = fp32_FPTwo(0);
+        } else if (exp1 == 255 || exp2 == 255) {
+            result = fp32_infinity(sgn1 ^ sgn2);
+        } else {
+            result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint32_t mnt, result;
+
+    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 255 && (uint32_t)(mnt << 9)) {
+        result = fp32_process_NaN(op, mode, &flags);
+    } else if (exp == 255) {
+        result = fp32_zero(sgn);
+    } else if (!mnt) {
+        result = fp32_infinity(sgn);
+        flags |= FPLIB_DZC;
+    } else if (!((uint32_t)(op << 1) >> 22)) {
+        bool overflow_to_inf;
+        switch (FPCRRounding(fpscr)) {
+          case FPRounding_TIEEVEN:
+            overflow_to_inf = true;
+            break;
+          case FPRounding_POSINF:
+            overflow_to_inf = !sgn;
+            break;
+          case FPRounding_NEGINF:
+            overflow_to_inf = sgn;
+            break;
+          case FPRounding_ZERO:
+            overflow_to_inf = false;
+            break;
+          default:
+            assert(0);
+        }
+        result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
+        flags |= FPLIB_OFC | FPLIB_IXC;
+    } else if (fpscr.fz && exp >= 253) {
+        result = fp32_zero(sgn);
+        flags |= FPLIB_UFC;
+    } else {
+        exp += 8;
+        mnt = fp32_normalise(mnt, &exp);
+        int result_exp = 253 - exp;
+        uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
+        fraction <<= 15;
+        if (result_exp == 0) {
+            fraction >>= 1;
+        } else if (result_exp == -1) {
+            fraction >>= 2;
+            result_exp = 0;
+        }
+        result = fp32_pack(sgn, result_exp, fraction);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint64_t mnt, result;
+
+    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 2047 && (uint64_t)(mnt << 12)) {
+        result = fp64_process_NaN(op, mode, &flags);
+    } else if (exp == 2047) {
+        result = fp64_zero(sgn);
+    } else if (!mnt) {
+        result = fp64_infinity(sgn);
+        flags |= FPLIB_DZC;
+    } else if (!((uint64_t)(op << 1) >> 51)) {
+        bool overflow_to_inf;
+        switch (FPCRRounding(fpscr)) {
+          case FPRounding_TIEEVEN:
+            overflow_to_inf = true;
+            break;
+          case FPRounding_POSINF:
+            overflow_to_inf = !sgn;
+            break;
+          case FPRounding_NEGINF:
+            overflow_to_inf = sgn;
+            break;
+          case FPRounding_ZERO:
+            overflow_to_inf = false;
+            break;
+          default:
+            assert(0);
+        }
+        result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
+        flags |= FPLIB_OFC | FPLIB_IXC;
+    } else if (fpscr.fz && exp >= 2045) {
+        result = fp64_zero(sgn);
+        flags |= FPLIB_UFC;
+    } else {
+        exp += 11;
+        mnt = fp64_normalise(mnt, &exp);
+        int result_exp = 2045 - exp;
+        uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
+        fraction <<= 44;
+        if (result_exp == 0) {
+            fraction >>= 1;
+        } else if (result_exp == -1) {
+            fraction >>= 2;
+            result_exp = 0;
+        }
+        result = fp64_pack(sgn, result_exp, fraction);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn1, exp1, sgn2, exp2;
+    uint64_t mnt1, mnt2, result;
+
+    op1 = fplibNeg<uint64_t>(op1);
+    fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
+    fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
+
+    result = fp64_process_NaNs(op1, op2, mode, &flags);
+    if (!result) {
+        if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
+            result = fp64_FPTwo(0);
+        } else if (exp1 == 2047 || exp2 == 2047) {
+            result = fp64_infinity(sgn1 ^ sgn2);
+        } else {
+            result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibRecpX(uint32_t op, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint32_t mnt, result;
+
+    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 255 && (uint32_t)(mnt << 9)) {
+        result = fp32_process_NaN(op, mode, &flags);
+    }
+    else {
+        if (!mnt) { // Zero and denormals
+            result = fp32_pack(sgn, 254, 0);
+        } else { // Infinities and normals
+            result = fp32_pack(sgn, exp ^ 255, 0);
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibRecpX(uint64_t op, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint64_t mnt, result;
+
+    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    if (exp == 2047 && (uint64_t)(mnt << 12)) {
+        result = fp64_process_NaN(op, mode, &flags);
+    }
+    else {
+        if (!mnt) { // Zero and denormals
+            result = fp64_pack(sgn, 2046, 0);
+        } else { // Infinities and normals
+            result = fp64_pack(sgn, exp ^ 2047, 0);
+        }
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint32_t mnt, result;
+
+    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+    fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    // Handle NaNs, infinities and zeroes:
+    if (exp == 255 && (uint32_t)(mnt << 9)) {
+        result = fp32_process_NaN(op, mode, &flags);
+    } else if (exp == 255) {
+        result = fp32_infinity(sgn);
+    } else if (!mnt) {
+        result = fp32_zero(sgn);
+    } else if (exp >= 150) {
+        // There are no fractional bits
+        result = op;
+    } else {
+        // Truncate towards zero:
+        uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
+        int err = exp < 118 ? 1 :
+            (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
+        switch (rounding) {
+          case FPRounding_TIEEVEN:
+            x += (err == 3 || (err == 2 && (x & 1)));
+            break;
+          case FPRounding_POSINF:
+            x += err && !sgn;
+            break;
+          case FPRounding_NEGINF:
+            x += err && sgn;
+            break;
+          case FPRounding_ZERO:
+            break;
+          case FPRounding_TIEAWAY:
+            x += err >> 1;
+            break;
+          default:
+            assert(0);
+        }
+
+        if (x == 0) {
+            result = fp32_zero(sgn);
+        } else {
+            exp = 150;
+            mnt = fp32_normalise(x, &exp);
+            result = fp32_pack(sgn, exp + 8, mnt >> 8);
+        }
+
+        if (err && exact)
+            flags |= FPLIB_IXC;
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
+{
+    int mode = modeConv(fpscr);
+    int flags = 0;
+    int sgn, exp;
+    uint64_t mnt, result;
+
+    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+    fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
+
+    // Handle NaNs, infinities and zeroes:
+    if (exp == 2047 && (uint64_t)(mnt << 12)) {
+        result = fp64_process_NaN(op, mode, &flags);
+    } else if (exp == 2047) {
+        result = fp64_infinity(sgn);
+    } else if (!mnt) {
+        result = fp64_zero(sgn);
+    } else if (exp >= 1075) {
+        // There are no fractional bits
+        result = op;
+    } else {
+        // Truncate towards zero:
+        uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
+        int err = exp < 1011 ? 1 :
+            (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
+        switch (rounding) {
+          case FPRounding_TIEEVEN:
+            x += (err == 3 || (err == 2 && (x & 1)));
+            break;
+          case FPRounding_POSINF:
+            x += err && !sgn;
+            break;
+          case FPRounding_NEGINF:
+            x += err && sgn;
+            break;
+          case FPRounding_ZERO:
+            break;
+          case FPRounding_TIEAWAY:
+            x += err >> 1;
+            break;
+          default:
+            assert(0);
+        }
+
+        if (x == 0) {
+            result = fp64_zero(sgn);
+        } else {
+            exp = 1075;
+            mnt = fp64_normalise(x, &exp);
+            result = fp64_pack(sgn, exp + 11, mnt >> 11);
+        }
+
+        if (err && exact)
+            flags |= FPLIB_IXC;
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibSqrt(uint32_t op, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint64_t
+fplibSqrt(uint64_t op, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint32_t
+fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+template <>
+uint64_t
+fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
+    set_fpscr0(fpscr, flags);
+    return result;
+}
+
+static uint64_t
+FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
+             int *flags)
+{
+    uint64_t x;
+    int err;
+
+    if (exp > 1023 + 63) {
+        *flags = FPLIB_IOC;
+        return ((uint64_t)!u << 63) - !sgn;
+    }
+
+    x = lsr64(mnt << 11, 1023 + 63 - exp);
+    err = (exp > 1023 + 63 - 2 ? 0 :
+           (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
+           !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
+
+    switch (rounding) {
+      case FPRounding_TIEEVEN:
+        x += (err == 3 || (err == 2 && (x & 1)));
+        break;
+      case FPRounding_POSINF:
+        x += err && !sgn;
+        break;
+      case FPRounding_NEGINF:
+        x += err && sgn;
+        break;
+      case FPRounding_ZERO:
+        break;
+      case FPRounding_TIEAWAY:
+        x += err >> 1;
+        break;
+      default:
+        assert(0);
+    }
+
+    if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
+        *flags = FPLIB_IOC;
+        return ((uint64_t)!u << 63) - !sgn;
+    }
+
+    if (err) {
+        *flags = FPLIB_IXC;
+    }
+
+    return sgn ? -x : x;
+}
+
+static uint32_t
+FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
+             int *flags)
+{
+    uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
+    if (u ? x >= (uint64_t)1 << 32 :
+        !(x < (uint64_t)1 << 31 ||
+          (uint64_t)-x <= (uint64_t)1 << 31)) {
+        *flags = FPLIB_IOC;
+        x = ((uint32_t)!u << 31) - !sgn;
+    }
+    return x;
+}
+
+template <>
+uint32_t
+fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+    int flags = 0;
+    int sgn, exp;
+    uint32_t mnt, result;
+
+    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
+
+    // If NaN, set cumulative flag or take exception:
+    if (exp == 255 && (uint32_t)(mnt << 9)) {
+        flags = FPLIB_IOC;
+        result = 0;
+    } else {
+        result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
+                              (uint64_t)mnt << (52 - 23), u, rounding, &flags);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint32_t
+fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+    int flags = 0;
+    int sgn, exp;
+    uint64_t mnt;
+    uint32_t result;
+
+    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
+
+    // If NaN, set cumulative flag or take exception:
+    if (exp == 2047 && (uint64_t)(mnt << 12)) {
+        flags = FPLIB_IOC;
+        result = 0;
+    } else {
+        result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+    int flags = 0;
+    int sgn, exp;
+    uint32_t mnt;
+    uint64_t result;
+
+    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+    fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
+
+    // If NaN, set cumulative flag or take exception:
+    if (exp == 255 && (uint32_t)(mnt << 9)) {
+        flags = FPLIB_IOC;
+        result = 0;
+    } else {
+        result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
+                              (uint64_t)mnt << (52 - 23), u, rounding, &flags);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+template <>
+uint64_t
+fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+    int flags = 0;
+    int sgn, exp;
+    uint64_t mnt, result;
+
+    // Unpack using FPCR to determine if subnormals are flushed-to-zero:
+    fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
+
+    // If NaN, set cumulative flag or take exception:
+    if (exp == 2047 && (uint64_t)(mnt << 12)) {
+        flags = FPLIB_IOC;
+        result = 0;
+    } else {
+        result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
+    }
+
+    set_fpscr0(fpscr, flags);
+
+    return result;
+}
+
+static uint32_t
+fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
+{
+    int x_sgn = !u && a >> 63;
+    int x_exp = 190 - fbits;
+    uint64_t x_mnt = x_sgn ? -a : a;
+
+    // Handle zero:
+    if (!x_mnt) {
+        return fp32_zero(0);
+    }
+
+    // Normalise and convert to 32 bits, collapsing error into bottom bit:
+    x_mnt = fp64_normalise(x_mnt, &x_exp);
+    x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
+
+    return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
+}
+
+static uint64_t
+fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
+{
+    int x_sgn = !u && a >> 63;
+    int x_exp = 1024 + 62 - fbits;
+    uint64_t x_mnt = x_sgn ? -a : a;
+
+    // Handle zero:
+    if (!x_mnt) {
+        return fp64_zero(0);
+    }
+
+    x_mnt = fp64_normalise(x_mnt, &x_exp);
+
+    return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
+}
+
+template <>
+uint32_t
+fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint32_t res = fp32_cvtf(op, fbits, u,
+                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
+                             &flags);
+    set_fpscr0(fpscr, flags);
+    return res;
+}
+
+template <>
+uint64_t
+fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
+{
+    int flags = 0;
+    uint64_t res = fp64_cvtf(op, fbits, u,
+                             (int)rounding | ((uint32_t)fpscr >> 22 & 12),
+                             &flags);
+    set_fpscr0(fpscr, flags);
+    return res;
+}
+
+}
diff --git a/src/arch/arm/insts/fplib.hh b/src/arch/arm/insts/fplib.hh
new file mode 100644
index 000000000..6263687fc
--- /dev/null
+++ b/src/arch/arm/insts/fplib.hh
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2012-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Edmund Grimley Evans
+ *          Thomas Grocutt
+ */
+
+/**
+ * @file
+ * Floating-point library code, which will gradually replace vfp.hh. For
+ * portability, this library does not use floating-point data types.  Currently,
+ * C's standard integer types are used in the API, though this could be changed
+ * to something like class Fp32 { uint32_t x; }, etc.
+ */
+
+#ifndef __ARCH_ARM_INSTS_FPLIB_HH__
+#define __ARCH_ARM_INSTS_FPLIB_HH__
+
+#include <stdint.h>
+
+#include "arch/arm/miscregs.hh"
+
+namespace ArmISA
+{
+
+enum FPRounding {
+    FPRounding_TIEEVEN = 0,
+    FPRounding_POSINF = 1,
+    FPRounding_NEGINF = 2,
+    FPRounding_ZERO = 3,
+    FPRounding_TIEAWAY = 4,
+    FPRounding_ODD = 5
+};
+
+static inline FPRounding
+FPCRRounding(FPSCR &fpscr)
+{
+    return (FPRounding)((uint32_t)fpscr >> 22 & 3);
+}
+
+/** Floating-point absolute value. */
+template <class T>
+T fplibAbs(T op);
+/** Floating-point add. */
+template <class T>
+T fplibAdd(T op1, T op2, FPSCR &fpscr);
+/** Floating-point compare (quiet and signaling). */
+template <class T>
+int fplibCompare(T op1, T op2, bool signal_nans, FPSCR &fpscr);
+/** Floating-point compare equal. */
+template <class T>
+bool fplibCompareEQ(T op1, T op2, FPSCR &fpscr);
+/** Floating-point compare greater than or equal. */
+template <class T>
+bool fplibCompareGE(T op1, T op2, FPSCR &fpscr);
+/** Floating-point compare greater than. */
+template <class T>
+bool fplibCompareGT(T op1, T op2, FPSCR &fpscr);
+/** Floating-point convert precision. */
+template <class T1, class T2>
+T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr);
+/** Floating-point division. */
+template <class T>
+T fplibDiv(T op1, T op2, FPSCR &fpscr);
+/** Floating-point maximum. */
+template <class T>
+T fplibMax(T op1, T op2, FPSCR &fpscr);
+/** Floating-point maximum number. */
+template <class T>
+T fplibMaxNum(T op1, T op2, FPSCR &fpscr);
+/** Floating-point minimum. */
+template <class T>
+T fplibMin(T op1, T op2, FPSCR &fpscr);
+/** Floating-point minimum number. */
+template <class T>
+T fplibMinNum(T op1, T op2, FPSCR &fpscr);
+/** Floating-point multiply. */
+template <class T>
+T fplibMul(T op1, T op2, FPSCR &fpscr);
+/** Floating-point multiply-add. */
+template <class T>
+T fplibMulAdd(T addend, T op1, T op2, FPSCR &fpscr);
+/** Floating-point multiply extended. */
+template <class T>
+T fplibMulX(T op1, T op2, FPSCR &fpscr);
+/** Floating-point negate. */
+template <class T>
+T fplibNeg(T op);
+/** Floating-point reciprocal square root estimate. */
+template <class T>
+T fplibRSqrtEstimate(T op, FPSCR &fpscr);
+/** Floating-point reciprocal square root step. */
+template <class T>
+T fplibRSqrtStepFused(T op1, T op2, FPSCR &fpscr);
+/** Floating-point reciprocal estimate. */
+template <class T>
+T fplibRecipEstimate(T op, FPSCR &fpscr);
+/** Floating-point reciprocal step. */
+template <class T>
+T fplibRecipStepFused(T op1, T op2, FPSCR &fpscr);
+/** Floating-point reciprocal exponent. */
+template <class T>
+T fplibRecpX(T op, FPSCR &fpscr);
+/**  Floating-point convert to integer. */
+template <class T>
+T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr);
+/** Floating-point square root. */
+template <class T>
+T fplibSqrt(T op, FPSCR &fpscr);
+/** Floating-point subtract. */
+template <class T>
+T fplibSub(T op1, T op2, FPSCR &fpscr);
+/** Floating-point convert to fixed-point. */
+template <class T1, class T2>
+T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr);
+/** Floating-point convert from fixed-point. */
+template <class T>
+T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
+                 FPSCR &fpscr);
+
+/* Function specializations... */
+template <>
+uint32_t fplibAbs(uint32_t op);
+template <>
+uint64_t fplibAbs(uint64_t op);
+template <>
+uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr);
+template <>
+int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr);
+template <>
+bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint32_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint32_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr);
+template <>
+uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2,
+                     FPSCR &fpscr);
+template <>
+uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2,
+                     FPSCR &fpscr);
+template <>
+uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibNeg(uint32_t op);
+template <>
+uint64_t fplibNeg(uint64_t op);
+template <>
+uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr);
+template<>
+uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr);
+template <>
+uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr);
+template <>
+uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr);
+template <>
+uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr);
+template <>
+uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr);
+template <>
+uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact,
+                       FPSCR &fpscr);
+template <>
+uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact,
+                       FPSCR &fpscr);
+template <>
+uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr);
+template <>
+uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr);
+template <>
+uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr);
+template <>
+uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr);
+template <>
+uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding,
+                        FPSCR &fpscr);
+template <>
+uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding,
+                        FPSCR &fpscr);
+template <>
+uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding,
+                        FPSCR &fpscr);
+template <>
+uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding,
+                        FPSCR &fpscr);
+template <>
+uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
+                        FPSCR &fpscr);
+template <>
+uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
+                        FPSCR &fpscr);
+}
+
+#endif
diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc
index 26a916fc7..42cb98a7c 100644
--- a/src/arch/arm/insts/macromem.cc
+++ b/src/arch/arm/insts/macromem.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -43,7 +43,9 @@
 #include <sstream>
 
 #include "arch/arm/insts/macromem.hh"
+
 #include "arch/arm/generated/decoder.hh"
+#include "arch/arm/insts/neon64_mem.hh"
 
 using namespace std;
 using namespace ArmISAInst;
@@ -177,6 +179,212 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
     }
 }
 
+PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                     uint32_t size, bool fp, bool load, bool noAlloc,
+                     bool signExt, bool exclusive, bool acrel,
+                     int64_t imm, AddrMode mode,
+                     IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    bool writeback = (mode != AddrMd_Offset);
+    numMicroops = 1 + (size / 4) + (writeback ? 1 : 0);
+    microOps = new StaticInstPtr[numMicroops];
+
+    StaticInstPtr *uop = microOps;
+
+    bool post = (mode == AddrMd_PostIndex);
+
+    rn = makeSP(rn);
+
+    *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm);
+
+    if (fp) {
+        if (size == 16) {
+            if (load) {
+                *++uop = new MicroLdrQBFpXImmUop(machInst, rt,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+                *++uop = new MicroLdrQTFpXImmUop(machInst, rt,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+                *++uop = new MicroLdrQBFpXImmUop(machInst, rt2,
+                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
+                *++uop = new MicroLdrQTFpXImmUop(machInst, rt2,
+                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
+            } else {
+                *++uop = new MicroStrQBFpXImmUop(machInst, rt,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+                *++uop = new MicroStrQTFpXImmUop(machInst, rt,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+                *++uop = new MicroStrQBFpXImmUop(machInst, rt2,
+                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
+                *++uop = new MicroStrQTFpXImmUop(machInst, rt2,
+                        INTREG_UREG0, 16, noAlloc, exclusive, acrel);
+            }
+        } else if (size == 8) {
+            if (load) {
+                *++uop = new MicroLdrFpXImmUop(machInst, rt,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+                *++uop = new MicroLdrFpXImmUop(machInst, rt2,
+                        INTREG_UREG0, 8, noAlloc, exclusive, acrel);
+            } else {
+                *++uop = new MicroStrFpXImmUop(machInst, rt,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+                *++uop = new MicroStrFpXImmUop(machInst, rt2,
+                        INTREG_UREG0, 8, noAlloc, exclusive, acrel);
+            }
+        } else if (size == 4) {
+            if (load) {
+                *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+            } else {
+                *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+            }
+        }
+    } else {
+        if (size == 8) {
+            if (load) {
+                *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0,
+                        0, noAlloc, exclusive, acrel);
+                *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0,
+                        size, noAlloc, exclusive, acrel);
+            } else {
+                *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0,
+                        0, noAlloc, exclusive, acrel);
+                *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0,
+                        size, noAlloc, exclusive, acrel);
+            }
+        } else if (size == 4) {
+            if (load) {
+                if (signExt) {
+                    *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2,
+                            INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+                } else {
+                    *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2,
+                            INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+                }
+            } else {
+                *++uop = new MicroStrDXImmUop(machInst, rt, rt2,
+                        INTREG_UREG0, 0, noAlloc, exclusive, acrel);
+            }
+        }
+    }
+
+    if (writeback) {
+        *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0,
+                                   post ? imm : 0);
+    }
+
+    (*uop)->setLastMicroop();
+
+    for (StaticInstPtr *curUop = microOps;
+            !(*curUop)->isLastMicroop(); curUop++) {
+        (*curUop)->setDelayedCommit();
+    }
+}
+
+BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
+                             OpClass __opClass, bool load, IntRegIndex dest,
+                             IntRegIndex base, int64_t imm) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    numMicroops = 2;
+    microOps = new StaticInstPtr[numMicroops];
+
+    if (load) {
+        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
+        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
+    } else {
+        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
+        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
+    }
+    microOps[0]->setDelayedCommit();
+    microOps[1]->setLastMicroop();
+}
+
+BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
+                               OpClass __opClass, bool load, IntRegIndex dest,
+                               IntRegIndex base, int64_t imm) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    numMicroops = 3;
+    microOps = new StaticInstPtr[numMicroops];
+
+    if (load) {
+        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0);
+        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0);
+    } else {
+        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0);
+        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
+    }
+    microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
+
+    microOps[0]->setDelayedCommit();
+    microOps[1]->setDelayedCommit();
+    microOps[2]->setLastMicroop();
+}
+
+BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
+                             OpClass __opClass, bool load, IntRegIndex dest,
+                             IntRegIndex base, int64_t imm) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    numMicroops = 3;
+    microOps = new StaticInstPtr[numMicroops];
+
+    if (load) {
+        microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm);
+        microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm);
+    } else {
+        microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
+        microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
+    }
+    microOps[2] = new MicroAddXiUop(machInst, base, base, imm);
+
+    microOps[0]->setDelayedCommit();
+    microOps[1]->setDelayedCommit();
+    microOps[2]->setLastMicroop();
+}
+
+BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
+                             OpClass __opClass, bool load, IntRegIndex dest,
+                             IntRegIndex base, IntRegIndex offset,
+                             ArmExtendType type, int64_t imm) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    numMicroops = 2;
+    microOps = new StaticInstPtr[numMicroops];
+
+    if (load) {
+        microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base,
+                                              offset, type, imm);
+        microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base,
+                                              offset, type, imm);
+    } else {
+        microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base,
+                                              offset, type, imm);
+        microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base,
+                                              offset, type, imm);
+    }
+
+    microOps[0]->setDelayedCommit();
+    microOps[1]->setLastMicroop();
+}
+
+BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
+                             OpClass __opClass, IntRegIndex dest,
+                             int64_t imm) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    numMicroops = 2;
+    microOps = new StaticInstPtr[numMicroops];
+
+    microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm);
+    microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm);
+
+    microOps[0]->setDelayedCommit();
+    microOps[1]->setLastMicroop();
+}
+
 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
                      unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
                      unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
@@ -193,7 +401,7 @@ VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
     if (deinterleave) numMicroops += (regs / elems);
     microOps = new StaticInstPtr[numMicroops];
 
-    RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
+    RegIndex rMid = deinterleave ? NumFloatV7ArchRegs : vd * 2;
 
     uint32_t noAlign = TLB::MustBeOne;
 
@@ -295,7 +503,7 @@ VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
     numMicroops += (regs / elems);
     microOps = new StaticInstPtr[numMicroops];
 
-    RegIndex ufp0 = NumFloatArchRegs;
+    RegIndex ufp0 = NumFloatV7ArchRegs;
 
     unsigned uopIdx = 0;
     switch (loadSize) {
@@ -556,7 +764,7 @@ VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
 
     uint32_t noAlign = TLB::MustBeOne;
 
-    RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
+    RegIndex rMid = interleave ? NumFloatV7ArchRegs : vd * 2;
 
     unsigned uopIdx = 0;
     if (interleave) {
@@ -657,7 +865,7 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
     numMicroops += (regs / elems);
     microOps = new StaticInstPtr[numMicroops];
 
-    RegIndex ufp0 = NumFloatArchRegs;
+    RegIndex ufp0 = NumFloatV7ArchRegs;
 
     unsigned uopIdx = 0;
     switch (elems) {
@@ -834,6 +1042,285 @@ VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
     microOps[numMicroops - 1]->setLastMicroop();
 }
 
+VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
+                         OpClass __opClass, RegIndex rn, RegIndex vd,
+                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
+                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    RegIndex vx = NumFloatV8ArchRegs / 4;
+    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
+    bool baseIsSP = isSP((IntRegIndex) rnsp);
+
+    numMicroops = wb ? 1 : 0;
+
+    int totNumBytes = numRegs * dataSize / 8;
+    assert(totNumBytes <= 64);
+
+    // The guiding principle here is that no more than 16 bytes can be
+    // transferred at a time
+    int numMemMicroops = totNumBytes / 16;
+    int residuum = totNumBytes % 16;
+    if (residuum)
+        ++numMemMicroops;
+    numMicroops += numMemMicroops;
+
+    int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
+    numMicroops += numMarshalMicroops;
+
+    microOps = new StaticInstPtr[numMicroops];
+    unsigned uopIdx = 0;
+    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
+        TLB::AllowUnaligned;
+
+    int i = 0;
+    for(; i < numMemMicroops - 1; ++i) {
+        microOps[uopIdx++] = new MicroNeonLoad64(
+            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
+            baseIsSP, 16 /* accSize */, eSize);
+    }
+    microOps[uopIdx++] =  new MicroNeonLoad64(
+        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
+        residuum ? residuum : 16 /* accSize */, eSize);
+
+    // Writeback microop: the post-increment amount is encoded in "Rm": a
+    // 64-bit general register OR as '11111' for an immediate value equal to
+    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
+    if (wb) {
+        if (rm != ((RegIndex) INTREG_X31)) {
+            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
+                                                      UXTX, 0);
+        } else {
+            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
+                                                   totNumBytes);
+        }
+    }
+
+    for (int i = 0; i < numMarshalMicroops; ++i) {
+        microOps[uopIdx++] = new MicroDeintNeon64(
+            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+            numStructElems, numRegs, i /* step */);
+    }
+
+    assert(uopIdx == numMicroops);
+
+    for (int i = 0; i < numMicroops - 1; ++i) {
+        microOps[i]->setDelayedCommit();
+    }
+    microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
+                         OpClass __opClass, RegIndex rn, RegIndex vd,
+                         RegIndex rm, uint8_t eSize, uint8_t dataSize,
+                         uint8_t numStructElems, uint8_t numRegs, bool wb) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    RegIndex vx = NumFloatV8ArchRegs / 4;
+    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
+    bool baseIsSP = isSP((IntRegIndex) rnsp);
+
+    numMicroops = wb ? 1 : 0;
+
+    int totNumBytes = numRegs * dataSize / 8;
+    assert(totNumBytes <= 64);
+
+    // The guiding principle here is that no more than 16 bytes can be
+    // transferred at a time
+    int numMemMicroops = totNumBytes / 16;
+    int residuum = totNumBytes % 16;
+    if (residuum)
+        ++numMemMicroops;
+    numMicroops += numMemMicroops;
+
+    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
+    numMicroops += numMarshalMicroops;
+
+    microOps = new StaticInstPtr[numMicroops];
+    unsigned uopIdx = 0;
+
+    for(int i = 0; i < numMarshalMicroops; ++i) {
+        microOps[uopIdx++] = new MicroIntNeon64(
+            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+            numStructElems, numRegs, i /* step */);
+    }
+
+    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
+        TLB::AllowUnaligned;
+
+    int i = 0;
+    for(; i < numMemMicroops - 1; ++i) {
+        microOps[uopIdx++] = new MicroNeonStore64(
+            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
+            baseIsSP, 16 /* accSize */, eSize);
+    }
+    microOps[uopIdx++] = new MicroNeonStore64(
+        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
+        residuum ? residuum : 16 /* accSize */, eSize);
+
+    // Writeback microop: the post-increment amount is encoded in "Rm": a
+    // 64-bit general register OR as '11111' for an immediate value equal to
+    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
+    if (wb) {
+        if (rm != ((RegIndex) INTREG_X31)) {
+            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
+                                                      UXTX, 0);
+        } else {
+            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
+                                                   totNumBytes);
+        }
+    }
+
+    assert(uopIdx == numMicroops);
+
+    for (int i = 0; i < numMicroops - 1; i++) {
+        microOps[i]->setDelayedCommit();
+    }
+    microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
+                             OpClass __opClass, RegIndex rn, RegIndex vd,
+                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
+                             uint8_t numStructElems, uint8_t index, bool wb,
+                             bool replicate) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    RegIndex vx = NumFloatV8ArchRegs / 4;
+    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
+    bool baseIsSP = isSP((IntRegIndex) rnsp);
+
+    numMicroops = wb ? 1 : 0;
+
+    int eSizeBytes = 1 << eSize;
+    int totNumBytes = numStructElems * eSizeBytes;
+    assert(totNumBytes <= 64);
+
+    // The guiding principle here is that no more than 16 bytes can be
+    // transferred at a time
+    int numMemMicroops = totNumBytes / 16;
+    int residuum = totNumBytes % 16;
+    if (residuum)
+        ++numMemMicroops;
+    numMicroops += numMemMicroops;
+
+    int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
+    numMicroops += numMarshalMicroops;
+
+    microOps = new StaticInstPtr[numMicroops];
+    unsigned uopIdx = 0;
+
+    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
+        TLB::AllowUnaligned;
+
+    int i = 0;
+    for (; i < numMemMicroops - 1; ++i) {
+        microOps[uopIdx++] = new MicroNeonLoad64(
+            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
+            baseIsSP, 16 /* accSize */, eSize);
+    }
+    microOps[uopIdx++] = new MicroNeonLoad64(
+        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
+        residuum ? residuum : 16 /* accSize */, eSize);
+
+    // Writeback microop: the post-increment amount is encoded in "Rm": a
+    // 64-bit general register OR as '11111' for an immediate value equal to
+    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
+    if (wb) {
+        if (rm != ((RegIndex) INTREG_X31)) {
+            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
+                                                      UXTX, 0);
+        } else {
+            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
+                                                   totNumBytes);
+        }
+    }
+
+    for(int i = 0; i < numMarshalMicroops; ++i) {
+        microOps[uopIdx++] = new MicroUnpackNeon64(
+            machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
+            numStructElems, index, i /* step */, replicate);
+    }
+
+    assert(uopIdx == numMicroops);
+
+    for (int i = 0; i < numMicroops - 1; i++) {
+        microOps[i]->setDelayedCommit();
+    }
+    microOps[numMicroops - 1]->setLastMicroop();
+}
+
+VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
+                             OpClass __opClass, RegIndex rn, RegIndex vd,
+                             RegIndex rm, uint8_t eSize, uint8_t dataSize,
+                             uint8_t numStructElems, uint8_t index, bool wb,
+                             bool replicate) :
+    PredMacroOp(mnem, machInst, __opClass)
+{
+    RegIndex vx = NumFloatV8ArchRegs / 4;
+    RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
+    bool baseIsSP = isSP((IntRegIndex) rnsp);
+
+    numMicroops = wb ? 1 : 0;
+
+    int eSizeBytes = 1 << eSize;
+    int totNumBytes = numStructElems * eSizeBytes;
+    assert(totNumBytes <= 64);
+
+    // The guiding principle here is that no more than 16 bytes can be
+    // transferred at a time
+    int numMemMicroops = totNumBytes / 16;
+    int residuum = totNumBytes % 16;
+    if (residuum)
+        ++numMemMicroops;
+    numMicroops += numMemMicroops;
+
+    int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
+    numMicroops += numMarshalMicroops;
+
+    microOps = new StaticInstPtr[numMicroops];
+    unsigned uopIdx = 0;
+
+    for(int i = 0; i < numMarshalMicroops; ++i) {
+        microOps[uopIdx++] = new MicroPackNeon64(
+            machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
+            numStructElems, index, i /* step */, replicate);
+    }
+
+    uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
+        TLB::AllowUnaligned;
+
+    int i = 0;
+    for(; i < numMemMicroops - 1; ++i) {
+        microOps[uopIdx++] = new MicroNeonStore64(
+            machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
+            baseIsSP, 16 /* accsize */, eSize);
+    }
+    microOps[uopIdx++] = new MicroNeonStore64(
+        machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
+        residuum ? residuum : 16 /* accSize */, eSize);
+
+    // Writeback microop: the post-increment amount is encoded in "Rm": a
+    // 64-bit general register OR as '11111' for an immediate value equal to
+    // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
+    if (wb) {
+        if (rm != ((RegIndex) INTREG_X31)) {
+            microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
+                                                      UXTX, 0);
+        } else {
+            microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
+                                                   totNumBytes);
+        }
+    }
+
+    assert(uopIdx == numMicroops);
+
+    for (int i = 0; i < numMicroops - 1; i++) {
+        microOps[i]->setDelayedCommit();
+    }
+    microOps[numMicroops - 1]->setLastMicroop();
+}
+
 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
                              OpClass __opClass, IntRegIndex rn,
                              RegIndex vd, bool single, bool up,
@@ -846,14 +1333,14 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
     // to be functionally identical except that fldmx is deprecated. For now
     // we'll assume they're otherwise interchangable.
     int count = (single ? offset : (offset / 2));
-    if (count == 0 || count > NumFloatArchRegs)
+    if (count == 0 || count > NumFloatV7ArchRegs)
         warn_once("Bad offset field for VFP load/store multiple.\n");
     if (count == 0) {
         // Force there to be at least one microop so the macroop makes sense.
         writeback = true;
     }
-    if (count > NumFloatArchRegs)
-        count = NumFloatArchRegs;
+    if (count > NumFloatV7ArchRegs)
+        count = NumFloatV7ArchRegs;
 
     numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
     microOps = new StaticInstPtr[numMicroops];
@@ -933,6 +1420,19 @@ MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
     return ss.str();
 }
 
+std::string
+MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss);
+    printReg(ss, ura);
+    ss << ", ";
+    printReg(ss, urb);
+    ss << ", ";
+    ccprintf(ss, "#%d", imm);
+    return ss.str();
+}
+
 std::string
 MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 {
@@ -942,6 +1442,18 @@ MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
     return ss.str();
 }
 
+std::string
+MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss);
+    printReg(ss, ura);
+    ccprintf(ss, ", ");
+    printReg(ss, urb);
+    printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
+    return ss.str();
+}
+
 std::string
 MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 {
diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh
index 4933a1e7c..fc8e3e1b7 100644
--- a/src/arch/arm/insts/macromem.hh
+++ b/src/arch/arm/insts/macromem.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -85,6 +85,27 @@ class MicroOp : public PredOp
     }
 };
 
+class MicroOpX : public ArmStaticInst
+{
+  protected:
+    MicroOpX(const char *mnem, ExtMachInst machInst, OpClass __opClass)
+            : ArmStaticInst(mnem, machInst, __opClass)
+    {}
+
+  public:
+    void
+    advancePC(PCState &pcState) const
+    {
+        if (flags[IsLastMicroop]) {
+            pcState.uEnd();
+        } else if (flags[IsMicroop]) {
+            pcState.uAdvance();
+        } else {
+            pcState.advance();
+        }
+    }
+};
+
 /**
  * Microops for Neon loads/stores
  */
@@ -135,6 +156,96 @@ class MicroNeonMixLaneOp : public MicroNeonMixOp
     }
 };
 
+/**
+ * Microops for AArch64 NEON load/store (de)interleaving
+ */
+class MicroNeonMixOp64 : public MicroOp
+{
+  protected:
+    RegIndex dest, op1;
+    uint8_t eSize, dataSize, numStructElems, numRegs, step;
+
+    MicroNeonMixOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                     RegIndex _dest, RegIndex _op1, uint8_t _eSize,
+                     uint8_t _dataSize, uint8_t _numStructElems,
+                     uint8_t _numRegs, uint8_t _step)
+        : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1),
+          eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems),
+          numRegs(_numRegs), step(_step)
+    {
+    }
+};
+
+class MicroNeonMixLaneOp64 : public MicroOp
+{
+  protected:
+    RegIndex dest, op1;
+    uint8_t eSize, dataSize, numStructElems, lane, step;
+    bool replicate;
+
+    MicroNeonMixLaneOp64(const char *mnem, ExtMachInst machInst,
+                         OpClass __opClass, RegIndex _dest, RegIndex _op1,
+                         uint8_t _eSize, uint8_t _dataSize,
+                         uint8_t _numStructElems, uint8_t _lane, uint8_t _step,
+                         bool _replicate = false)
+        : MicroOp(mnem, machInst, __opClass), dest(_dest), op1(_op1),
+          eSize(_eSize), dataSize(_dataSize), numStructElems(_numStructElems),
+          lane(_lane), step(_step), replicate(_replicate)
+    {
+    }
+};
+
+/**
+ * Base classes for microcoded AArch64 NEON memory instructions.
+ */
+class VldMultOp64 : public PredMacroOp
+{
+  protected:
+    uint8_t eSize, dataSize, numStructElems, numRegs;
+    bool wb;
+
+    VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize,
+                uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs,
+                bool wb);
+};
+
+class VstMultOp64 : public PredMacroOp
+{
+  protected:
+    uint8_t eSize, dataSize, numStructElems, numRegs;
+    bool wb;
+
+    VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize,
+                uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs,
+                bool wb);
+};
+
+class VldSingleOp64 : public PredMacroOp
+{
+  protected:
+    uint8_t eSize, dataSize, numStructElems, index;
+    bool wb, replicate;
+
+    VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                  RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize,
+                  uint8_t dataSize, uint8_t numStructElems, uint8_t index,
+                  bool wb, bool replicate = false);
+};
+
+class VstSingleOp64 : public PredMacroOp
+{
+  protected:
+    uint8_t eSize, dataSize, numStructElems, index;
+    bool wb, replicate;
+
+    VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                  RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize,
+                  uint8_t dataSize, uint8_t numStructElems, uint8_t index,
+                  bool wb, bool replicate = false);
+};
+
 /**
  * Microops of the form
  * PC   = IntRegA
@@ -180,10 +291,10 @@ class MicroIntImmOp : public MicroOp
 {
   protected:
     RegIndex ura, urb;
-    uint32_t imm;
+    int32_t imm;
 
     MicroIntImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
-                  RegIndex _ura, RegIndex _urb, uint32_t _imm)
+                  RegIndex _ura, RegIndex _urb, int32_t _imm)
             : MicroOp(mnem, machInst, __opClass),
               ura(_ura), urb(_urb), imm(_imm)
     {
@@ -192,6 +303,22 @@ class MicroIntImmOp : public MicroOp
     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 };
 
+class MicroIntImmXOp : public MicroOpX
+{
+  protected:
+    RegIndex ura, urb;
+    int64_t imm;
+
+    MicroIntImmXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                   RegIndex _ura, RegIndex _urb, int64_t _imm)
+            : MicroOpX(mnem, machInst, __opClass),
+              ura(_ura), urb(_urb), imm(_imm)
+    {
+    }
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
 /**
  * Microops of the form IntRegA = IntRegB op IntRegC
  */
@@ -210,6 +337,25 @@ class MicroIntOp : public MicroOp
     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 };
 
+class MicroIntRegXOp : public MicroOp
+{
+  protected:
+    RegIndex ura, urb, urc;
+    ArmExtendType type;
+    uint32_t shiftAmt;
+
+    MicroIntRegXOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                   RegIndex _ura, RegIndex _urb, RegIndex _urc,
+                   ArmExtendType _type, uint32_t _shiftAmt)
+            : MicroOp(mnem, machInst, __opClass),
+              ura(_ura), urb(_urb), urc(_urc),
+              type(_type), shiftAmt(_shiftAmt)
+    {
+    }
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
 /**
  * Microops of the form IntRegA = IntRegB op shifted IntRegC
  */
@@ -260,6 +406,61 @@ class MacroMemOp : public PredMacroOp
                bool writeback, bool load, uint32_t reglist);
 };
 
+/**
+ * Base class for pair load/store instructions.
+ */
+class PairMemOp : public PredMacroOp
+{
+  public:
+    enum AddrMode {
+        AddrMd_Offset,
+        AddrMd_PreIndex,
+        AddrMd_PostIndex
+    };
+
+  protected:
+    PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+              uint32_t size, bool fp, bool load, bool noAlloc, bool signExt,
+              bool exclusive, bool acrel, int64_t imm, AddrMode mode,
+              IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2);
+};
+
+class BigFpMemImmOp : public PredMacroOp
+{
+  protected:
+    BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                  bool load, IntRegIndex dest, IntRegIndex base, int64_t imm);
+};
+
+class BigFpMemPostOp : public PredMacroOp
+{
+  protected:
+    BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                   bool load, IntRegIndex dest, IntRegIndex base, int64_t imm);
+};
+
+class BigFpMemPreOp : public PredMacroOp
+{
+  protected:
+    BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                  bool load, IntRegIndex dest, IntRegIndex base, int64_t imm);
+};
+
+class BigFpMemRegOp : public PredMacroOp
+{
+  protected:
+    BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                  bool load, IntRegIndex dest, IntRegIndex base,
+                  IntRegIndex offset, ArmExtendType type, int64_t imm);
+};
+
+class BigFpMemLitOp : public PredMacroOp
+{
+  protected:
+    BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
+                  IntRegIndex dest, int64_t imm);
+};
+
 /**
  * Base classes for microcoded integer memory instructions.
  */
diff --git a/src/arch/arm/insts/mem.cc b/src/arch/arm/insts/mem.cc
index 552803b6a..15702ff83 100644
--- a/src/arch/arm/insts/mem.cc
+++ b/src/arch/arm/insts/mem.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -157,6 +157,9 @@ SrsOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
       case MODE_ABORT:
         ss << "abort";
         break;
+      case MODE_HYP:
+        ss << "hyp";
+        break;
       case MODE_UNDEFINED:
         ss << "undefined";
         break;
diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc
new file mode 100644
index 000000000..4d1fdd302
--- /dev/null
+++ b/src/arch/arm/insts/mem64.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/arm/insts/mem64.hh"
+#include "arch/arm/tlb.hh"
+#include "base/loader/symtab.hh"
+#include "mem/request.hh"
+
+using namespace std;
+
+namespace ArmISA
+{
+
+std::string
+SysDC64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    ccprintf(ss, ", [");
+    printReg(ss, base);
+    ccprintf(ss, "]");
+    return ss.str();
+}
+
+
+
+void
+Memory64::startDisassembly(std::ostream &os) const
+{
+    printMnemonic(os, "", false);
+    printReg(os, dest);
+    ccprintf(os, ", [");
+    printReg(os, base);
+}
+
+void
+Memory64::setExcAcRel(bool exclusive, bool acrel)
+{
+    if (exclusive)
+        memAccessFlags |= Request::LLSC;
+    else
+        memAccessFlags |= ArmISA::TLB::AllowUnaligned;
+    if (acrel) {
+        flags[IsMemBarrier] = true;
+        flags[IsWriteBarrier] = true;
+        flags[IsReadBarrier] = true;
+    }
+}
+
+std::string
+MemoryImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    startDisassembly(ss);
+    if (imm)
+        ccprintf(ss, ", #%d", imm);
+    ccprintf(ss, "]");
+    return ss.str();
+}
+
+std::string
+MemoryDImm64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, dest2);
+    ccprintf(ss, ", [");
+    printReg(ss, base);
+    if (imm)
+        ccprintf(ss, ", #%d", imm);
+    ccprintf(ss, "]");
+    return ss.str();
+}
+
+std::string
+MemoryDImmEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, result);
+    ccprintf(ss, ", ");
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, dest2);
+    ccprintf(ss, ", [");
+    printReg(ss, base);
+    if (imm)
+        ccprintf(ss, ", #%d", imm);
+    ccprintf(ss, "]");
+    return ss.str();
+}
+
+std::string
+MemoryPreIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    startDisassembly(ss);
+    ccprintf(ss, ", #%d]!", imm);
+    return ss.str();
+}
+
+std::string
+MemoryPostIndex64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    startDisassembly(ss);
+    if (imm)
+        ccprintf(ss, "], #%d", imm);
+    ccprintf(ss, "]");
+    return ss.str();
+}
+
+std::string
+MemoryReg64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    startDisassembly(ss);
+    printExtendOperand(false, ss, offset, type, shiftAmt);
+    ccprintf(ss, "]");
+    return ss.str();
+}
+
+std::string
+MemoryRaw64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    startDisassembly(ss);
+    ccprintf(ss, "]");
+    return ss.str();
+}
+
+std::string
+MemoryEx64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, result);
+    ccprintf(ss, ", [");
+    printReg(ss, base);
+    ccprintf(ss, "]");
+    return ss.str();
+}
+
+std::string
+MemoryLiteral64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", #%d", pc + imm);
+    return ss.str();
+}
+}
diff --git a/src/arch/arm/insts/mem64.hh b/src/arch/arm/insts/mem64.hh
new file mode 100644
index 000000000..21c1e1ea8
--- /dev/null
+++ b/src/arch/arm/insts/mem64.hh
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+#ifndef __ARCH_ARM_MEM64_HH__
+#define __ARCH_ARM_MEM64_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+
+namespace ArmISA
+{
+
+class SysDC64 : public ArmStaticInst
+{
+  protected:
+    IntRegIndex base;
+    IntRegIndex dest;
+    uint64_t imm;
+
+    SysDC64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+            IntRegIndex _base, IntRegIndex _dest, uint64_t _imm)
+        : ArmStaticInst(mnem, _machInst, __opClass), base(_base), dest(_dest),
+        imm(_imm)
+    {}
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MightBeMicro64 : public ArmStaticInst
+{
+  protected:
+    MightBeMicro64(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
+        : ArmStaticInst(mnem, _machInst, __opClass)
+    {}
+
+    void
+    advancePC(PCState &pcState) const
+    {
+        if (flags[IsLastMicroop]) {
+            pcState.uEnd();
+        } else if (flags[IsMicroop]) {
+            pcState.uAdvance();
+        } else {
+            pcState.advance();
+        }
+    }
+};
+
+class Memory64 : public MightBeMicro64
+{
+  public:
+    enum AddrMode {
+        AddrMd_Offset,
+        AddrMd_PreIndex,
+        AddrMd_PostIndex
+    };
+
+  protected:
+
+    IntRegIndex dest;
+    IntRegIndex base;
+    /// True if the base register is SP (used for SP alignment checking).
+    bool baseIsSP;
+    static const unsigned numMicroops = 3;
+
+    StaticInstPtr *uops;
+
+    Memory64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+             IntRegIndex _dest, IntRegIndex _base)
+        : MightBeMicro64(mnem, _machInst, __opClass),
+          dest(_dest), base(_base), uops(NULL)
+    {
+        baseIsSP = isSP(_base);
+    }
+
+    virtual
+    ~Memory64()
+    {
+        delete [] uops;
+    }
+
+    StaticInstPtr
+    fetchMicroop(MicroPC microPC) const
+    {
+        assert(uops != NULL && microPC < numMicroops);
+        return uops[microPC];
+    }
+
+    void startDisassembly(std::ostream &os) const;
+
+    unsigned memAccessFlags;
+
+    void setExcAcRel(bool exclusive, bool acrel);
+};
+
+class MemoryImm64 : public Memory64
+{
+  protected:
+    int64_t imm;
+
+    MemoryImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, IntRegIndex _base, int64_t _imm)
+        : Memory64(mnem, _machInst, __opClass, _dest, _base), imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryDImm64 : public MemoryImm64
+{
+  protected:
+    IntRegIndex dest2;
+
+    MemoryDImm64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base,
+                int64_t _imm)
+        : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm),
+          dest2(_dest2)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryDImmEx64 : public MemoryDImm64
+{
+  protected:
+    IntRegIndex result;
+
+    MemoryDImmEx64(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                 IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2,
+                 IntRegIndex _base, int32_t _imm)
+        : MemoryDImm64(mnem, _machInst, __opClass, _dest, _dest2,
+                     _base, _imm), result(_result)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryPreIndex64 : public MemoryImm64
+{
+  protected:
+    MemoryPreIndex64(const char *mnem, ExtMachInst _machInst,
+                     OpClass __opClass, IntRegIndex _dest, IntRegIndex _base,
+                     int64_t _imm)
+        : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryPostIndex64 : public MemoryImm64
+{
+  protected:
+    MemoryPostIndex64(const char *mnem, ExtMachInst _machInst,
+                      OpClass __opClass, IntRegIndex _dest, IntRegIndex _base,
+                      int64_t _imm)
+        : MemoryImm64(mnem, _machInst, __opClass, _dest, _base, _imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryReg64 : public Memory64
+{
+  protected:
+    IntRegIndex offset;
+    ArmExtendType type;
+    uint64_t shiftAmt;
+
+    MemoryReg64(const char *mnem, ExtMachInst _machInst,
+                OpClass __opClass, IntRegIndex _dest, IntRegIndex _base,
+                IntRegIndex _offset, ArmExtendType _type,
+                uint64_t _shiftAmt)
+        : Memory64(mnem, _machInst, __opClass, _dest, _base),
+          offset(_offset), type(_type), shiftAmt(_shiftAmt)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryRaw64 : public Memory64
+{
+  protected:
+    MemoryRaw64(const char *mnem, ExtMachInst _machInst,
+                OpClass __opClass, IntRegIndex _dest, IntRegIndex _base)
+        : Memory64(mnem, _machInst, __opClass, _dest, _base)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryEx64 : public Memory64
+{
+  protected:
+    IntRegIndex result;
+
+    MemoryEx64(const char *mnem, ExtMachInst _machInst,
+               OpClass __opClass, IntRegIndex _dest, IntRegIndex _base,
+               IntRegIndex _result)
+        : Memory64(mnem, _machInst, __opClass, _dest, _base), result(_result)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class MemoryLiteral64 : public Memory64
+{
+  protected:
+    int64_t imm;
+
+    MemoryLiteral64(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, IntRegIndex _dest, int64_t _imm)
+        : Memory64(mnem, _machInst, __opClass, _dest, INTREG_ZERO), imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+}
+
+#endif //__ARCH_ARM_INSTS_MEM_HH__
diff --git a/src/arch/arm/insts/misc.cc b/src/arch/arm/insts/misc.cc
index 6320bb6da..efc334c4b 100644
--- a/src/arch/arm/insts/misc.cc
+++ b/src/arch/arm/insts/misc.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -145,6 +145,32 @@ MsrRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
     return ss.str();
 }
 
+std::string
+MrrcOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss);
+    printReg(ss, dest);
+    ss << ", ";
+    printReg(ss, dest2);
+    ss << ", ";
+    printReg(ss, op1);
+    return ss.str();
+}
+
+std::string
+McrrOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss);
+    printReg(ss, dest);
+    ss << ", ";
+    printReg(ss, op1);
+    ss << ", ";
+    printReg(ss, op2);
+    return ss.str();
+}
+
 std::string
 ImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 {
@@ -229,6 +255,16 @@ RegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
     return ss.str();
 }
 
+std::string
+RegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss);
+    printReg(ss, dest);
+    ccprintf(ss, ", #%d, #%d", imm1, imm2);
+    return ss.str();
+}
+
 std::string
 RegRegImmImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 {
diff --git a/src/arch/arm/insts/misc.hh b/src/arch/arm/insts/misc.hh
index c9e114f85..3d947a272 100644
--- a/src/arch/arm/insts/misc.hh
+++ b/src/arch/arm/insts/misc.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -94,6 +94,42 @@ class MsrRegOp : public MsrBase
     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 };
 
+class MrrcOp : public PredOp
+{
+  protected:
+    IntRegIndex op1;
+    IntRegIndex dest;
+    IntRegIndex dest2;
+    uint32_t    imm;
+
+    MrrcOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+           IntRegIndex _op1, IntRegIndex _dest, IntRegIndex _dest2,
+           uint32_t _imm) :
+        PredOp(mnem, _machInst, __opClass), op1(_op1), dest(_dest),
+        dest2(_dest2), imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class McrrOp : public PredOp
+{
+  protected:
+    IntRegIndex op1;
+    IntRegIndex op2;
+    IntRegIndex dest;
+    uint32_t    imm;
+
+    McrrOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+           IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _dest,
+           uint32_t _imm) :
+        PredOp(mnem, _machInst, __opClass), op1(_op1), op2(_op2),
+        dest(_dest), imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
 class ImmOp : public PredOp
 {
   protected:
@@ -220,6 +256,23 @@ class RegRegImmOp : public PredOp
     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 };
 
+class RegImmImmOp : public PredOp
+{
+  protected:
+    IntRegIndex dest;
+    IntRegIndex op1;
+    uint64_t imm1;
+    uint64_t imm2;
+
+    RegImmImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2) :
+        PredOp(mnem, _machInst, __opClass),
+        dest(_dest), imm1(_imm1), imm2(_imm2)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
 class RegRegImmImmOp : public PredOp
 {
   protected:
diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc
new file mode 100644
index 000000000..3553020da
--- /dev/null
+++ b/src/arch/arm/insts/misc64.cc
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#include "arch/arm/insts/misc64.hh"
+
+std::string
+RegRegImmImmOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ss << ", ";
+    printReg(ss, op1);
+    ccprintf(ss, ", #%d, #%d", imm1, imm2);
+    return ss.str();
+}
+
+std::string
+RegRegRegImmOp64::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ss << ", ";
+    printReg(ss, op1);
+    ss << ", ";
+    printReg(ss, op2);
+    ccprintf(ss, ", #%d", imm);
+    return ss.str();
+}
+
+std::string
+UnknownOp64::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    return csprintf("%-10s (inst %#08x)", "unknown", machInst);
+}
diff --git a/src/arch/arm/insts/misc64.hh b/src/arch/arm/insts/misc64.hh
new file mode 100644
index 000000000..5a0e18224
--- /dev/null
+++ b/src/arch/arm/insts/misc64.hh
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Gabe Black
+ */
+
+#ifndef __ARCH_ARM_INSTS_MISC64_HH__
+#define __ARCH_ARM_INSTS_MISC64_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+
+class RegRegImmImmOp64 : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest;
+    IntRegIndex op1;
+    uint64_t imm1;
+    uint64_t imm2;
+
+    RegRegImmImmOp64(const char *mnem, ExtMachInst _machInst,
+                     OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1,
+                     uint64_t _imm1, uint64_t _imm2) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), imm1(_imm1), imm2(_imm2)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class RegRegRegImmOp64 : public ArmStaticInst
+{
+  protected:
+    IntRegIndex dest;
+    IntRegIndex op1;
+    IntRegIndex op2;
+    uint64_t imm;
+
+    RegRegRegImmOp64(const char *mnem, ExtMachInst _machInst,
+                     OpClass __opClass, IntRegIndex _dest, IntRegIndex _op1,
+                     IntRegIndex _op2, uint64_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2), imm(_imm)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class UnknownOp64 : public ArmStaticInst
+{
+  protected:
+
+    UnknownOp64(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
+        ArmStaticInst(mnem, _machInst, __opClass)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+#endif
diff --git a/src/arch/arm/insts/neon64_mem.hh b/src/arch/arm/insts/neon64_mem.hh
new file mode 100644
index 000000000..01ce1b624
--- /dev/null
+++ b/src/arch/arm/insts/neon64_mem.hh
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2012-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Mbou Eyole
+ *          Giacomo Gabrielli
+ */
+
+/// @file
+/// Utility functions and datatypes used by AArch64 NEON memory instructions.
+
+#ifndef __ARCH_ARM_INSTS_NEON64_MEM_HH__
+#define __ARCH_ARM_INSTS_NEON64_MEM_HH__
+
+namespace ArmISA
+{
+
+typedef uint64_t XReg;
+
+/// 128-bit NEON vector register.
+struct VReg {
+    XReg hi;
+    XReg lo;
+};
+
+/// Write a single NEON vector element leaving the others untouched.
+inline void
+writeVecElem(VReg *dest, XReg src, int index, int eSize)
+{
+    // eSize must be less than 4:
+    // 0 -> 8-bit elems,
+    // 1 -> 16-bit elems,
+    // 2 -> 32-bit elems,
+    // 3 -> 64-bit elems
+    assert(eSize <= 3);
+
+    int eBits = 8 << eSize;
+    int lsbPos = index * eBits;
+    assert(lsbPos < 128);
+    int shiftAmt = lsbPos % 64;
+
+    XReg maskBits = -1;
+    if (eBits == 64) {
+        maskBits = 0;
+    } else {
+        maskBits = maskBits << eBits;
+    }
+    maskBits = ~maskBits;
+
+    XReg sMask = maskBits;
+    maskBits = sMask << shiftAmt;
+
+    if (lsbPos < 64) {
+        dest->lo = (dest->lo & (~maskBits)) | ((src & sMask) << shiftAmt);
+    } else {
+        dest->hi = (dest->hi & (~maskBits)) | ((src & sMask) << shiftAmt);
+    }
+}
+
+/// Read a single NEON vector element.
+inline XReg
+readVecElem(VReg src, int index, int eSize)
+{
+    // eSize must be less than 4:
+    // 0 -> 8-bit elems,
+    // 1 -> 16-bit elems,
+    // 2 -> 32-bit elems,
+    // 3 -> 64-bit elems
+    assert(eSize <= 3);
+
+    XReg data;
+
+    int eBits = 8 << eSize;
+    int lsbPos = index * eBits;
+    assert(lsbPos < 128);
+    int shiftAmt = lsbPos % 64;
+
+    XReg maskBits = -1;
+    if (eBits == 64) {
+        maskBits = 0;
+    } else {
+        maskBits = maskBits << eBits;
+    }
+    maskBits = ~maskBits;
+
+    if (lsbPos < 64) {
+        data = (src.lo >> shiftAmt) & maskBits;
+    } else {
+        data = (src.hi >> shiftAmt) & maskBits;
+    }
+    return data;
+}
+
+}  // namespace ArmISA
+
+#endif  // __ARCH_ARM_INSTS_NEON64_MEM_HH__
diff --git a/src/arch/arm/insts/pred_inst.hh b/src/arch/arm/insts/pred_inst.hh
index c441d1f32..c5e2ab386 100644
--- a/src/arch/arm/insts/pred_inst.hh
+++ b/src/arch/arm/insts/pred_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -78,7 +78,8 @@ modified_imm(uint8_t ctrlImm, uint8_t dataImm)
 }
 
 static inline uint64_t
-simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid)
+simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid,
+                  bool isAarch64 = false)
 {
     uint64_t bigData = data;
     immValid = true;
@@ -133,12 +134,20 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data, bool &immValid)
         }
         break;
       case 0xf:
-        if (!op) {
-            uint64_t bVal = bits(bigData, 6) ? (0x1F) : (0x20);
-            bigData = (bits(bigData, 5, 0) << 19) |
-                      (bVal << 25) | (bits(bigData, 7) << 31);
-            bigData |= (bigData << 32);
-            break;
+        {
+            uint64_t bVal = 0;
+            if (!op) {
+                bVal = bits(bigData, 6) ? (0x1F) : (0x20);
+                bigData = (bits(bigData, 5, 0) << 19) |
+                          (bVal << 25) | (bits(bigData, 7) << 31);
+                bigData |= (bigData << 32);
+                break;
+            } else if (isAarch64) {
+                bVal = bits(bigData, 6) ? (0x0FF) : (0x100);
+                bigData = (bits(bigData, 5, 0) << 48) |
+                          (bVal << 54) | (bits(bigData, 7) << 63);
+                break;
+            }
         }
         // Fall through, immediate encoding is invalid.
       default:
@@ -179,11 +188,14 @@ class PredOp : public ArmStaticInst
 
     /// Constructor
     PredOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
-           ArmStaticInst(mnem, _machInst, __opClass),
-           condCode(machInst.itstateMask ?
-                   (ConditionCode)(uint8_t)machInst.itstateCond :
-                   (ConditionCode)(unsigned)machInst.condCode)
+           ArmStaticInst(mnem, _machInst, __opClass)
     {
+        if (machInst.aarch64)
+            condCode = COND_UC;
+        else if (machInst.itstateMask)
+            condCode = (ConditionCode)(uint8_t)machInst.itstateCond;
+        else
+            condCode = (ConditionCode)(unsigned)machInst.condCode;
     }
 };
 
diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc
index 2a8dee162..260c29a84 100644
--- a/src/arch/arm/insts/static_inst.cc
+++ b/src/arch/arm/insts/static_inst.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -86,6 +86,90 @@ ArmStaticInst::shift_rm_imm(uint32_t base, uint32_t shamt,
     return 0;
 }
 
+int64_t
+ArmStaticInst::shiftReg64(uint64_t base, uint64_t shiftAmt,
+                          ArmShiftType type, uint8_t width) const
+{
+    shiftAmt = shiftAmt % width;
+    ArmShiftType shiftType;
+    shiftType = (ArmShiftType)type;
+
+    switch (shiftType)
+    {
+      case LSL:
+        return base << shiftAmt;
+      case LSR:
+        if (shiftAmt == 0)
+            return base;
+        else
+            return (base & mask(width)) >> shiftAmt;
+      case ASR:
+        if (shiftAmt == 0) {
+            return base;
+        } else {
+            int sign_bit = bits(base, intWidth - 1);
+            base >>= shiftAmt;
+            base = sign_bit ? (base | ~mask(intWidth - shiftAmt)) : base;
+            return base & mask(intWidth);
+        }
+      case ROR:
+        if (shiftAmt == 0)
+            return base;
+        else
+            return (base << (width - shiftAmt)) | (base >> shiftAmt);
+      default:
+        ccprintf(std::cerr, "Unhandled shift type\n");
+        exit(1);
+        break;
+    }
+    return 0;
+}
+
+int64_t
+ArmStaticInst::extendReg64(uint64_t base, ArmExtendType type,
+                           uint64_t shiftAmt, uint8_t width) const
+{
+    bool sign_extend = false;
+    int len = 0;
+    switch (type) {
+      case UXTB:
+        len = 8;
+        break;
+      case UXTH:
+        len = 16;
+        break;
+      case UXTW:
+        len = 32;
+        break;
+      case UXTX:
+        len = 64;
+        break;
+      case SXTB:
+        len = 8;
+        sign_extend = true;
+        break;
+      case SXTH:
+        len = 16;
+        sign_extend = true;
+        break;
+      case SXTW:
+        len = 32;
+        sign_extend = true;
+        break;
+      case SXTX:
+        len = 64;
+        sign_extend = true;
+        break;
+    }
+    len = len <= width - shiftAmt ? len : width - shiftAmt;
+    uint64_t tmp = (uint64_t) bits(base, len - 1, 0) << shiftAmt;
+    if (sign_extend) {
+        int sign_bit = bits(tmp, len + shiftAmt - 1);
+        tmp = sign_bit ? (tmp | ~mask(len + shiftAmt)) : tmp;
+    }
+    return tmp & mask(width);
+}
+
 // Shift Rm by Rs
 int32_t
 ArmStaticInst::shift_rm_rs(uint32_t base, uint32_t shamt,
@@ -214,22 +298,33 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const
 
     switch (regIdxToClass(reg, &rel_reg)) {
       case IntRegClass:
-        switch (rel_reg) {
-          case PCReg:
-            ccprintf(os, "pc");
-            break;
-          case StackPointerReg:
-            ccprintf(os, "sp");
-            break;
-          case FramePointerReg:
-            ccprintf(os, "fp");
-            break;
-          case ReturnAddressReg:
-            ccprintf(os, "lr");
-            break;
-          default:
-            ccprintf(os, "r%d", reg);
-            break;
+        if (aarch64) {
+            if (reg == INTREG_UREG0)
+                ccprintf(os, "ureg0");
+            else if (reg == INTREG_SPX)
+               ccprintf(os, "%s%s", (intWidth == 32) ? "w" : "", "sp");
+            else if (reg == INTREG_X31)
+                ccprintf(os, "%szr", (intWidth == 32) ? "w" : "x");
+            else
+                ccprintf(os, "%s%d", (intWidth == 32) ? "w" : "x", reg);
+        } else {
+            switch (rel_reg) {
+              case PCReg:
+                ccprintf(os, "pc");
+                break;
+              case StackPointerReg:
+                ccprintf(os, "sp");
+                break;
+              case FramePointerReg:
+                ccprintf(os, "fp");
+                break;
+              case ReturnAddressReg:
+                ccprintf(os, "lr");
+                break;
+              default:
+                ccprintf(os, "r%d", reg);
+                break;
+            }
         }
         break;
       case FloatRegClass:
@@ -247,67 +342,102 @@ ArmStaticInst::printReg(std::ostream &os, int reg) const
 void
 ArmStaticInst::printMnemonic(std::ostream &os,
                              const std::string &suffix,
-                             bool withPred) const
+                             bool withPred,
+                             bool withCond64,
+                             ConditionCode cond64) const
 {
     os << "  " << mnemonic;
-    if (withPred) {
-        unsigned condCode = machInst.condCode;
-        switch (condCode) {
-          case COND_EQ:
-            os << "eq";
-            break;
-          case COND_NE:
-            os << "ne";
-            break;
-          case COND_CS:
-            os << "cs";
-            break;
-          case COND_CC:
-            os << "cc";
-            break;
-          case COND_MI:
-            os << "mi";
-            break;
-          case COND_PL:
-            os << "pl";
-            break;
-          case COND_VS:
-            os << "vs";
-            break;
-          case COND_VC:
-            os << "vc";
-            break;
-          case COND_HI:
-            os << "hi";
-            break;
-          case COND_LS:
-            os << "ls";
-            break;
-          case COND_GE:
-            os << "ge";
-            break;
-          case COND_LT:
-            os << "lt";
-            break;
-          case COND_GT:
-            os << "gt";
-            break;
-          case COND_LE:
-            os << "le";
-            break;
-          case COND_AL:
-            // This one is implicit.
-            break;
-          case COND_UC:
-            // Unconditional.
-            break;
-          default:
-            panic("Unrecognized condition code %d.\n", condCode);
-        }
+    if (withPred && !aarch64) {
+        printCondition(os, machInst.condCode);
         os << suffix;
-        if (machInst.bigThumb)
-            os << ".w";
-        os << "   ";
+    } else if (withCond64) {
+        os << ".";
+        printCondition(os, cond64);
+        os << suffix;
+    }
+    if (machInst.bigThumb)
+        os << ".w";
+    os << "   ";
+}
+
+void
+ArmStaticInst::printTarget(std::ostream &os, Addr target,
+                           const SymbolTable *symtab) const
+{
+    Addr symbolAddr;
+    std::string symbol;
+
+    if (symtab && symtab->findNearestSymbol(target, symbol, symbolAddr)) {
+        ccprintf(os, "<%s", symbol);
+        if (symbolAddr != target)
+            ccprintf(os, "+%d>", target - symbolAddr);
+        else
+            ccprintf(os, ">");
+    } else {
+        ccprintf(os, "%#x", target);
+    }
+}
+
+void
+ArmStaticInst::printCondition(std::ostream &os,
+                              unsigned code,
+                              bool noImplicit) const
+{
+    switch (code) {
+      case COND_EQ:
+        os << "eq";
+        break;
+      case COND_NE:
+        os << "ne";
+        break;
+      case COND_CS:
+        os << "cs";
+        break;
+      case COND_CC:
+        os << "cc";
+        break;
+      case COND_MI:
+        os << "mi";
+        break;
+      case COND_PL:
+        os << "pl";
+        break;
+      case COND_VS:
+        os << "vs";
+        break;
+      case COND_VC:
+        os << "vc";
+        break;
+      case COND_HI:
+        os << "hi";
+        break;
+      case COND_LS:
+        os << "ls";
+        break;
+      case COND_GE:
+        os << "ge";
+        break;
+      case COND_LT:
+        os << "lt";
+        break;
+      case COND_GT:
+        os << "gt";
+        break;
+      case COND_LE:
+        os << "le";
+        break;
+      case COND_AL:
+        // This one is implicit.
+        if (noImplicit)
+            os << "al";
+        break;
+      case COND_UC:
+        // Unconditional.
+        if (noImplicit)
+            os << "uc";
+        break;
+      default:
+        panic("Unrecognized condition code %d.\n", code);
     }
 }
 
@@ -392,6 +522,38 @@ ArmStaticInst::printShiftOperand(std::ostream &os,
     }
 }
 
+void
+ArmStaticInst::printExtendOperand(bool firstOperand, std::ostream &os,
+                                  IntRegIndex rm, ArmExtendType type,
+                                  int64_t shiftAmt) const
+{
+    if (!firstOperand)
+        ccprintf(os, ", ");
+    printReg(os, rm);
+    if (type == UXTX && shiftAmt == 0)
+        return;
+    switch (type) {
+      case UXTB: ccprintf(os, ", UXTB");
+        break;
+      case UXTH: ccprintf(os, ", UXTH");
+        break;
+      case UXTW: ccprintf(os, ", UXTW");
+        break;
+      case UXTX: ccprintf(os, ", LSL");
+        break;
+      case SXTB: ccprintf(os, ", SXTB");
+        break;
+      case SXTH: ccprintf(os, ", SXTH");
+        break;
+      case SXTW: ccprintf(os, ", SXTW");
+        break;
+      case SXTX: ccprintf(os, ", SXTW");
+        break;
+    }
+    if (type == UXTX || shiftAmt)
+        ccprintf(os, " #%d", shiftAmt);
+}
+
 void
 ArmStaticInst::printDataInst(std::ostream &os, bool withImm,
         bool immShift, bool s, IntRegIndex rd, IntRegIndex rn,
diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh
index c36024ecd..aeec67ec2 100644
--- a/src/arch/arm/insts/static_inst.hh
+++ b/src/arch/arm/insts/static_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -44,6 +44,7 @@
 
 #include "arch/arm/faults.hh"
 #include "arch/arm/utility.hh"
+#include "arch/arm/system.hh"
 #include "base/trace.hh"
 #include "cpu/static_inst.hh"
 #include "sim/byteswap.hh"
@@ -55,6 +56,9 @@ namespace ArmISA
 class ArmStaticInst : public StaticInst
 {
   protected:
+    bool aarch64;
+    uint8_t intWidth;
+
     int32_t shift_rm_imm(uint32_t base, uint32_t shamt,
                          uint32_t type, uint32_t cfval) const;
     int32_t shift_rm_rs(uint32_t base, uint32_t shamt,
@@ -65,6 +69,11 @@ class ArmStaticInst : public StaticInst
     bool shift_carry_rs(uint32_t base, uint32_t shamt,
                         uint32_t type, uint32_t cfval) const;
 
+    int64_t shiftReg64(uint64_t base, uint64_t shiftAmt,
+                       ArmShiftType type, uint8_t width) const;
+    int64_t extendReg64(uint64_t base, ArmExtendType type,
+                        uint64_t shiftAmt, uint8_t width) const;
+
     template<int width>
     static inline bool
     saturateOp(int32_t &res, int64_t op1, int64_t op2, bool sub=false)
@@ -135,6 +144,11 @@ class ArmStaticInst : public StaticInst
                   OpClass __opClass)
         : StaticInst(mnem, _machInst, __opClass)
     {
+        aarch64 = machInst.aarch64;
+        if (bits(machInst, 28, 24) == 0x10)
+            intWidth = 64;  // Force 64-bit width for ADR/ADRP
+        else
+            intWidth = (aarch64 && bits(machInst, 31)) ? 64 : 32;
     }
 
     /// Print a register name for disassembly given the unique
@@ -142,13 +156,22 @@ class ArmStaticInst : public StaticInst
     void printReg(std::ostream &os, int reg) const;
     void printMnemonic(std::ostream &os,
                        const std::string &suffix = "",
-                       bool withPred = true) const;
+                       bool withPred = true,
+                       bool withCond64 = false,
+                       ConditionCode cond64 = COND_UC) const;
+    void printTarget(std::ostream &os, Addr target,
+                     const SymbolTable *symtab) const;
+    void printCondition(std::ostream &os, unsigned code,
+                        bool noImplicit=false) const;
     void printMemSymbol(std::ostream &os, const SymbolTable *symtab,
                         const std::string &prefix, const Addr addr,
                         const std::string &suffix) const;
     void printShiftOperand(std::ostream &os, IntRegIndex rm,
                            bool immShift, uint32_t shiftAmt,
                            IntRegIndex rs, ArmShiftType type) const;
+    void printExtendOperand(bool firstOperand, std::ostream &os,
+                            IntRegIndex rm, ArmExtendType type,
+                            int64_t shiftAmt) const;
 
 
     void printDataInst(std::ostream &os, bool withImm) const;
@@ -166,10 +189,13 @@ class ArmStaticInst : public StaticInst
     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 
     static inline uint32_t
-    cpsrWriteByInstr(CPSR cpsr, uint32_t val,
-            uint8_t byteMask, bool affectState, bool nmfi)
+    cpsrWriteByInstr(CPSR cpsr, uint32_t val, SCR scr, NSACR nsacr,
+            uint8_t byteMask, bool affectState, bool nmfi, ThreadContext *tc)
     {
-        bool privileged = (cpsr.mode != MODE_USER);
+        bool privileged   = (cpsr.mode != MODE_USER);
+        bool haveVirt     = ArmSystem::haveVirtualization(tc);
+        bool haveSecurity = ArmSystem::haveSecurity(tc);
+        bool isSecure     = inSecureState(scr, cpsr) || !haveSecurity;
 
         uint32_t bitMask = 0;
 
@@ -182,14 +208,53 @@ class ArmStaticInst : public StaticInst
         }
         if (bits(byteMask, 1)) {
             unsigned highIdx = affectState ? 15 : 9;
-            unsigned lowIdx = privileged ? 8 : 9;
+            unsigned lowIdx = (privileged && (isSecure || scr.aw || haveVirt))
+                            ? 8 : 9;
             bitMask = bitMask | mask(highIdx, lowIdx);
         }
         if (bits(byteMask, 0)) {
             if (privileged) {
-                bitMask = bitMask | mask(7, 6);
-                if (!badMode((OperatingMode)(val & mask(5)))) {
-                    bitMask = bitMask | mask(5);
+                bitMask |= 1 << 7;
+                if ( (!nmfi || !((val >> 6) & 0x1)) &&
+                     (isSecure || scr.fw || haveVirt) ) {
+                    bitMask |= 1 << 6;
+                }
+                // Now check the new mode is allowed
+                OperatingMode newMode = (OperatingMode) (val & mask(5));
+                OperatingMode oldMode = (OperatingMode)(uint32_t)cpsr.mode;
+                if (!badMode(newMode)) {
+                    bool validModeChange = true;
+                    // Check for attempts to enter modes only permitted in
+                    // Secure state from Non-secure state. These are Monitor
+                    // mode ('10110'), and FIQ mode ('10001') if the Security
+                    // Extensions have reserved it.
+                    if (!isSecure && newMode == MODE_MON)
+                        validModeChange = false;
+                    if (!isSecure && newMode == MODE_FIQ && nsacr.rfr == '1')
+                        validModeChange = false;
+                    // There is no Hyp mode ('11010') in Secure state, so that
+                    // is UNPREDICTABLE
+                    if (scr.ns == '0' && newMode == MODE_HYP)
+                        validModeChange = false;
+                    // Cannot move into Hyp mode directly from a Non-secure
+                    // PL1 mode
+                    if (!isSecure && oldMode != MODE_HYP && newMode == MODE_HYP)
+                        validModeChange = false;
+                    // Cannot move out of Hyp mode with this function except
+                    // on an exception return
+                    if (oldMode == MODE_HYP && newMode != MODE_HYP && !affectState)
+                        validModeChange = false;
+                    // Must not change to 64 bit when running in 32 bit mode
+                    if (!opModeIs64(oldMode) && opModeIs64(newMode))
+                        validModeChange = false;
+
+                    // If we passed all of the above then set the bit mask to
+                    // copy the mode accross
+                    if (validModeChange) {
+                        bitMask = bitMask | mask(5);
+                    } else {
+                        warn_once("Illegal change to CPSR mode attempted\n");
+                    }
                 } else {
                     warn_once("Ignoring write of bad mode to CPSR.\n");
                 }
@@ -198,11 +263,7 @@ class ArmStaticInst : public StaticInst
                 bitMask = bitMask | (1 << 5);
         }
 
-        bool cpsr_f = cpsr.f;
-        uint32_t new_cpsr = ((uint32_t)cpsr & ~bitMask) | (val & bitMask);
-        if (nmfi && !cpsr_f)
-            new_cpsr &= ~(1 << 6);
-        return new_cpsr;
+        return ((uint32_t)cpsr & ~bitMask) | (val & bitMask);
     }
 
     static inline uint32_t
@@ -296,12 +357,12 @@ class ArmStaticInst : public StaticInst
     inline Fault
     disabledFault() const
     {
-        if (FullSystem) {
-            return new UndefinedInstruction();
-        } else {
-            return new UndefinedInstruction(machInst, false, mnemonic, true);
-        }
+        return new UndefinedInstruction(machInst, false, mnemonic, true);
     }
+
+  public:
+    virtual void
+    annotateFault(ArmFault *fault) {}
 };
 }
 
diff --git a/src/arch/arm/insts/vfp.cc b/src/arch/arm/insts/vfp.cc
index ca0f58226..03fdc83fa 100644
--- a/src/arch/arm/insts/vfp.cc
+++ b/src/arch/arm/insts/vfp.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -45,6 +45,37 @@
  * exception bits read before it, etc.
  */
 
+std::string
+FpCondCompRegOp::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, op1);
+    ccprintf(ss, ", ");
+    printReg(ss, op2);
+    ccprintf(ss, ", #%d", defCc);
+    ccprintf(ss, ", ");
+    printCondition(ss, condCode, true);
+    return ss.str();
+}
+
+std::string
+FpCondSelOp::generateDisassembly(
+        Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printReg(ss, dest);
+    ccprintf(ss, ", ");
+    printReg(ss, op1);
+    ccprintf(ss, ", ");
+    printReg(ss, op2);
+    ccprintf(ss, ", ");
+    printCondition(ss, condCode, true);
+    return ss.str();
+}
+
 std::string
 FpRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 {
@@ -91,6 +122,21 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
     return ss.str();
 }
 
+std::string
+FpRegRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss);
+    printReg(ss, dest + FP_Reg_Base);
+    ss << ", ";
+    printReg(ss, op1 + FP_Reg_Base);
+    ss << ", ";
+    printReg(ss, op2 + FP_Reg_Base);
+    ss << ", ";
+    printReg(ss, op3 + FP_Reg_Base);
+    return ss.str();
+}
+
 std::string
 FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 {
@@ -131,24 +177,25 @@ prepFpState(uint32_t rMode)
 }
 
 void
-finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)
+finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask)
 {
     int exceptions = fetestexcept(FeAllExceptions);
     bool underflow = false;
-    if (exceptions & FeInvalid) {
+    if ((exceptions & FeInvalid) && mask.ioc) {
         fpscr.ioc = 1;
     }
-    if (exceptions & FeDivByZero) {
+    if ((exceptions & FeDivByZero) && mask.dzc) {
         fpscr.dzc = 1;
     }
-    if (exceptions & FeOverflow) {
+    if ((exceptions & FeOverflow) && mask.ofc) {
         fpscr.ofc = 1;
     }
     if (exceptions & FeUnderflow) {
         underflow = true;
-        fpscr.ufc = 1;
+        if (mask.ufc)
+            fpscr.ufc = 1;
     }
-    if ((exceptions & FeInexact) && !(underflow && flush)) {
+    if ((exceptions & FeInexact) && !(underflow && flush) && mask.ixc) {
         fpscr.ixc = 1;
     }
     fesetround(state);
@@ -329,19 +376,33 @@ fixFpSFpDDest(FPSCR fpscr, float val)
     return mid;
 }
 
-uint16_t
-vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
-           uint32_t rMode, bool ahp, float op)
+static inline uint16_t
+vcvtFpFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+          uint32_t rMode, bool ahp, uint64_t opBits, bool isDouble)
 {
-    uint32_t opBits = fpToBits(op);
+    uint32_t mWidth;
+    uint32_t eWidth;
+    uint32_t eHalfRange;
+    uint32_t sBitPos;
+
+    if (isDouble) {
+        mWidth = 52;
+        eWidth = 11;
+    } else {
+        mWidth = 23;
+        eWidth = 8;
+    }
+    sBitPos    = eWidth + mWidth;
+    eHalfRange = (1 << (eWidth-1)) - 1;
+
     // Extract the operand.
-    bool neg = bits(opBits, 31);
-    uint32_t exponent = bits(opBits, 30, 23);
-    uint32_t oldMantissa = bits(opBits, 22, 0);
-    uint32_t mantissa = oldMantissa >> (23 - 10);
+    bool neg = bits(opBits, sBitPos);
+    uint32_t exponent = bits(opBits, sBitPos-1, mWidth);
+    uint64_t oldMantissa = bits(opBits, mWidth-1, 0);
+    uint32_t mantissa = oldMantissa >> (mWidth - 10);
     // Do the conversion.
-    uint32_t extra = oldMantissa & mask(23 - 10);
-    if (exponent == 0xff) {
+    uint64_t extra = oldMantissa & mask(mWidth - 10);
+    if (exponent == mask(eWidth)) {
         if (oldMantissa != 0) {
             // Nans.
             if (bits(mantissa, 9) == 0) {
@@ -379,7 +440,6 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
 
         if (exponent == 0) {
             // Denormalized.
-
             // If flush to zero is on, this shouldn't happen.
             assert(!flush);
 
@@ -407,13 +467,13 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
 
             // We need to track the dropped bits differently since
             // more can be dropped by denormalizing.
-            bool topOne = bits(extra, 12);
-            bool restZeros = bits(extra, 11, 0) == 0;
+            bool topOne = bits(extra, mWidth - 10 - 1);
+            bool restZeros = bits(extra, mWidth - 10 - 2, 0) == 0;
 
-            if (exponent <= (127 - 15)) {
+            if (exponent <= (eHalfRange - 15)) {
                 // The result is too small. Denormalize.
                 mantissa |= (1 << 10);
-                while (mantissa && exponent <= (127 - 15)) {
+                while (mantissa && exponent <= (eHalfRange - 15)) {
                     restZeros = restZeros && !topOne;
                     topOne = bits(mantissa, 0);
                     mantissa = mantissa >> 1;
@@ -424,7 +484,7 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
                 exponent = 0;
             } else {
                 // Change bias.
-                exponent -= (127 - 15);
+                exponent -= (eHalfRange - 15);
             }
 
             if (exponent == 0 && (inexact || fpscr.ufe)) {
@@ -488,155 +548,115 @@ vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
     return result;
 }
 
-float
-vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
+uint16_t
+vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+           uint32_t rMode, bool ahp, float op)
 {
-    float junk = 0.0;
+    uint64_t opBits = fpToBits(op);
+    return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, false);
+}
+
+uint16_t
+vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+           uint32_t rMode, bool ahp, double op)
+{
+    uint64_t opBits = fpToBits(op);
+    return vcvtFpFpH(fpscr, flush, defaultNan, rMode, ahp, opBits, true);
+}
+
+static inline uint64_t
+vcvtFpHFp(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op, bool isDouble)
+{
+    uint32_t mWidth;
+    uint32_t eWidth;
+    uint32_t eHalfRange;
+    uint32_t sBitPos;
+
+    if (isDouble) {
+        mWidth = 52;
+        eWidth = 11;
+    } else {
+        mWidth = 23;
+        eWidth = 8;
+    }
+    sBitPos    = eWidth + mWidth;
+    eHalfRange = (1 << (eWidth-1)) - 1;
+
     // Extract the bitfields.
     bool neg = bits(op, 15);
     uint32_t exponent = bits(op, 14, 10);
-    uint32_t mantissa = bits(op, 9, 0);
+    uint64_t mantissa = bits(op, 9, 0);
     // Do the conversion.
     if (exponent == 0) {
         if (mantissa != 0) {
             // Normalize the value.
-            exponent = exponent + (127 - 15) + 1;
+            exponent = exponent + (eHalfRange - 15) + 1;
             while (mantissa < (1 << 10)) {
                 mantissa = mantissa << 1;
                 exponent--;
             }
         }
-        mantissa = mantissa << (23 - 10);
+        mantissa = mantissa << (mWidth - 10);
     } else if (exponent == 0x1f && !ahp) {
         // Infinities and nans.
-        exponent = 0xff;
+        exponent = mask(eWidth);
         if (mantissa != 0) {
             // Nans.
-            mantissa = mantissa << (23 - 10);
-            if (bits(mantissa, 22) == 0) {
+            mantissa = mantissa << (mWidth - 10);
+            if (bits(mantissa, mWidth-1) == 0) {
                 // Signalling nan.
                 fpscr.ioc = 1;
-                mantissa |= (1 << 22);
+                mantissa |= (((uint64_t) 1) << (mWidth-1));
             }
             if (defaultNan) {
-                mantissa &= ~mask(22);
+                mantissa &= ~mask(mWidth-1);
                 neg = false;
             }
         }
     } else {
-        exponent = exponent + (127 - 15);
-        mantissa = mantissa << (23 - 10);
+        exponent = exponent + (eHalfRange - 15);
+        mantissa = mantissa << (mWidth - 10);
     }
     // Reassemble the result.
-    uint32_t result = bits(mantissa, 22, 0);
-    replaceBits(result, 30, 23, exponent);
-    if (neg)
-        result |= (1 << 31);
+    uint64_t result = bits(mantissa, mWidth-1, 0);
+    replaceBits(result, sBitPos-1, mWidth, exponent);
+    if (neg) {
+        result |= (((uint64_t) 1) << sBitPos);
+    }
+    return result;
+}
+
+double
+vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
+{
+    double junk = 0.0;
+    uint64_t result;
+
+    result = vcvtFpHFp(fpscr, defaultNan, ahp, op, true);
     return bitsToFp(result, junk);
 }
 
-uint64_t
-vfpFpSToFixed(float val, bool isSigned, bool half,
-              uint8_t imm, bool rzero)
+float
+vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
 {
-    int rmode = rzero ? FeRoundZero : fegetround();
-    __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
-    fesetround(FeRoundNearest);
-    val = val * powf(2.0, imm);
-    __asm__ __volatile__("" : "=m" (val) : "m" (val));
-    fesetround(rmode);
-    feclearexcept(FeAllExceptions);
-    __asm__ __volatile__("" : "=m" (val) : "m" (val));
-    float origVal = val;
-    val = rintf(val);
-    int fpType = std::fpclassify(val);
-    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
-        if (fpType == FP_NAN) {
-            feraiseexcept(FeInvalid);
-        }
-        val = 0.0;
-    } else if (origVal != val) {
-        switch (rmode) {
-          case FeRoundNearest:
-            if (origVal - val > 0.5)
-                val += 1.0;
-            else if (val - origVal > 0.5)
-                val -= 1.0;
-            break;
-          case FeRoundDown:
-            if (origVal < val)
-                val -= 1.0;
-            break;
-          case FeRoundUpward:
-            if (origVal > val)
-                val += 1.0;
-            break;
-        }
-        feraiseexcept(FeInexact);
-    }
+    float junk = 0.0;
+    uint64_t result;
 
-    if (isSigned) {
-        if (half) {
-            if ((double)val < (int16_t)(1 << 15)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return (int16_t)(1 << 15);
-            }
-            if ((double)val > (int16_t)mask(15)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return (int16_t)mask(15);
-            }
-            return (int16_t)val;
-        } else {
-            if ((double)val < (int32_t)(1 << 31)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return (int32_t)(1 << 31);
-            }
-            if ((double)val > (int32_t)mask(31)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return (int32_t)mask(31);
-            }
-            return (int32_t)val;
-        }
-    } else {
-        if (half) {
-            if ((double)val < 0) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return 0;
-            }
-            if ((double)val > (mask(16))) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return mask(16);
-            }
-            return (uint16_t)val;
-        } else {
-            if ((double)val < 0) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return 0;
-            }
-            if ((double)val > (mask(32))) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return mask(32);
-            }
-            return (uint32_t)val;
-        }
-    }
+    result = vcvtFpHFp(fpscr, defaultNan, ahp, op, false);
+    return bitsToFp(result, junk);
 }
 
 float
 vfpUFixedToFpS(bool flush, bool defaultNan,
-        uint32_t val, bool half, uint8_t imm)
+        uint64_t val, uint8_t width, uint8_t imm)
 {
     fesetround(FeRoundNearest);
-    if (half)
+    if (width == 16)
         val = (uint16_t)val;
+    else if (width == 32)
+        val = (uint32_t)val;
+    else if (width != 64)
+        panic("Unsupported width %d", width);
     float scale = powf(2.0, imm);
     __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
     feclearexcept(FeAllExceptions);
@@ -646,11 +666,16 @@ vfpUFixedToFpS(bool flush, bool defaultNan,
 
 float
 vfpSFixedToFpS(bool flush, bool defaultNan,
-        int32_t val, bool half, uint8_t imm)
+        int64_t val, uint8_t width, uint8_t imm)
 {
     fesetround(FeRoundNearest);
-    if (half)
+    if (width == 16)
         val = sext<16>(val & mask(16));
+    else if (width == 32)
+        val = sext<32>(val & mask(32));
+    else if (width != 64)
+        panic("Unsupported width %d", width);
+
     float scale = powf(2.0, imm);
     __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
     feclearexcept(FeAllExceptions);
@@ -658,106 +683,19 @@ vfpSFixedToFpS(bool flush, bool defaultNan,
     return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
 }
 
-uint64_t
-vfpFpDToFixed(double val, bool isSigned, bool half,
-              uint8_t imm, bool rzero)
-{
-    int rmode = rzero ? FeRoundZero : fegetround();
-    fesetround(FeRoundNearest);
-    val = val * pow(2.0, imm);
-    __asm__ __volatile__("" : "=m" (val) : "m" (val));
-    fesetround(rmode);
-    feclearexcept(FeAllExceptions);
-    __asm__ __volatile__("" : "=m" (val) : "m" (val));
-    double origVal = val;
-    val = rint(val);
-    int fpType = std::fpclassify(val);
-    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
-        if (fpType == FP_NAN) {
-            feraiseexcept(FeInvalid);
-        }
-        val = 0.0;
-    } else if (origVal != val) {
-        switch (rmode) {
-          case FeRoundNearest:
-            if (origVal - val > 0.5)
-                val += 1.0;
-            else if (val - origVal > 0.5)
-                val -= 1.0;
-            break;
-          case FeRoundDown:
-            if (origVal < val)
-                val -= 1.0;
-            break;
-          case FeRoundUpward:
-            if (origVal > val)
-                val += 1.0;
-            break;
-        }
-        feraiseexcept(FeInexact);
-    }
-    if (isSigned) {
-        if (half) {
-            if (val < (int16_t)(1 << 15)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return (int16_t)(1 << 15);
-            }
-            if (val > (int16_t)mask(15)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return (int16_t)mask(15);
-            }
-            return (int16_t)val;
-        } else {
-            if (val < (int32_t)(1 << 31)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return (int32_t)(1 << 31);
-            }
-            if (val > (int32_t)mask(31)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return (int32_t)mask(31);
-            }
-            return (int32_t)val;
-        }
-    } else {
-        if (half) {
-            if (val < 0) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return 0;
-            }
-            if (val > mask(16)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return mask(16);
-            }
-            return (uint16_t)val;
-        } else {
-            if (val < 0) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return 0;
-            }
-            if (val > mask(32)) {
-                feraiseexcept(FeInvalid);
-                feclearexcept(FeInexact);
-                return mask(32);
-            }
-            return (uint32_t)val;
-        }
-    }
-}
 
 double
 vfpUFixedToFpD(bool flush, bool defaultNan,
-        uint32_t val, bool half, uint8_t imm)
+        uint64_t val, uint8_t width, uint8_t imm)
 {
     fesetround(FeRoundNearest);
-    if (half)
+    if (width == 16)
         val = (uint16_t)val;
+    else if (width == 32)
+        val = (uint32_t)val;
+    else if (width != 64)
+        panic("Unsupported width %d", width);
+
     double scale = pow(2.0, imm);
     __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
     feclearexcept(FeAllExceptions);
@@ -767,11 +705,16 @@ vfpUFixedToFpD(bool flush, bool defaultNan,
 
 double
 vfpSFixedToFpD(bool flush, bool defaultNan,
-        int32_t val, bool half, uint8_t imm)
+        int64_t val, uint8_t width, uint8_t imm)
 {
     fesetround(FeRoundNearest);
-    if (half)
+    if (width == 16)
         val = sext<16>(val & mask(16));
+    else if (width == 32)
+        val = sext<32>(val & mask(32));
+    else if (width != 64)
+        panic("Unsupported width %d", width);
+
     double scale = pow(2.0, imm);
     __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
     feclearexcept(FeAllExceptions);
@@ -976,6 +919,85 @@ template
 double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
                          double op1, double op2) const;
 
+// @TODO remove this function when we've finished switching all FMA code to use the new FPLIB
+template <class fpType>
+fpType
+FpOp::ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
+                fpType (*func)(fpType, fpType, fpType),
+                bool flush, bool defaultNan, uint32_t rMode) const
+{
+    const bool single = (sizeof(fpType) == sizeof(float));
+    fpType junk = 0.0;
+
+    if (flush && (flushToZero(op1, op2) || flushToZero(op3)))
+        fpscr.idc = 1;
+    VfpSavedState state = prepFpState(rMode);
+    __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3), "=m" (state)
+                             :  "m" (op1),  "m" (op2),  "m" (op3),  "m" (state));
+    fpType dest = func(op1, op2, op3);
+    __asm__ __volatile__ ("" : "=m" (dest) : "m" (dest));
+
+    int fpClass = std::fpclassify(dest);
+    // Get NAN behavior right. This varies between x86 and ARM.
+    if (fpClass == FP_NAN) {
+        const uint64_t qnan =
+            single ? 0x7fc00000 : ULL(0x7ff8000000000000);
+        const bool nan1 = std::isnan(op1);
+        const bool nan2 = std::isnan(op2);
+        const bool nan3 = std::isnan(op3);
+        const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
+        const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
+        const bool signal3 = nan3 && ((fpToBits(op3) & qnan) != qnan);
+        if ((!nan1 && !nan2 && !nan3) || (defaultNan == 1)) {
+            dest = bitsToFp(qnan, junk);
+        } else if (signal1) {
+            dest = bitsToFp(fpToBits(op1) | qnan, junk);
+        } else if (signal2) {
+            dest = bitsToFp(fpToBits(op2) | qnan, junk);
+        } else if (signal3) {
+            dest = bitsToFp(fpToBits(op3) | qnan, junk);
+        } else if (nan1) {
+            dest = op1;
+        } else if (nan2) {
+            dest = op2;
+        } else if (nan3) {
+            dest = op3;
+        }
+    } else if (flush && flushToZero(dest)) {
+        feraiseexcept(FeUnderflow);
+    } else if ((
+                (single && (dest == bitsToFp(0x00800000, junk) ||
+                     dest == bitsToFp(0x80800000, junk))) ||
+                (!single &&
+                    (dest == bitsToFp(ULL(0x0010000000000000), junk) ||
+                     dest == bitsToFp(ULL(0x8010000000000000), junk)))
+               ) && rMode != VfpRoundZero) {
+        /*
+         * Correct for the fact that underflow is detected -before- rounding
+         * in ARM and -after- rounding in x86.
+         */
+        fesetround(FeRoundZero);
+        __asm__ __volatile__ ("" : "=m" (op1), "=m" (op2), "=m" (op3)
+                                 :  "m" (op1),  "m" (op2),  "m" (op3));
+        fpType temp = func(op1, op2, op2);
+        __asm__ __volatile__ ("" : "=m" (temp) : "m" (temp));
+        if (flush && flushToZero(temp)) {
+            dest = temp;
+        }
+    }
+    finishVfp(fpscr, state, flush);
+    return dest;
+}
+
+template
+float FpOp::ternaryOp(FPSCR &fpscr, float op1, float op2, float op3,
+                      float (*func)(float, float, float),
+                      bool flush, bool defaultNan, uint32_t rMode) const;
+template
+double FpOp::ternaryOp(FPSCR &fpscr, double op1, double op2, double op3,
+                       double (*func)(double, double, double),
+                       bool flush, bool defaultNan, uint32_t rMode) const;
+
 template <class fpType>
 fpType
 FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh
index 9babaae04..f17f90973 100644
--- a/src/arch/arm/insts/vfp.hh
+++ b/src/arch/arm/insts/vfp.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -104,7 +104,8 @@ enum VfpRoundingMode
     VfpRoundNearest = 0,
     VfpRoundUpward = 1,
     VfpRoundDown = 2,
-    VfpRoundZero = 3
+    VfpRoundZero = 3,
+    VfpRoundAway = 4
 };
 
 static inline float bitsToFp(uint64_t, float);
@@ -212,7 +213,7 @@ isSnan(fpType val)
 typedef int VfpSavedState;
 
 VfpSavedState prepFpState(uint32_t rMode);
-void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush);
+void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask);
 
 template <class fpType>
 fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
@@ -228,7 +229,11 @@ double fixFpSFpDDest(FPSCR fpscr, float val);
 
 uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
                     uint32_t rMode, bool ahp, float op);
-float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
+uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
+                    uint32_t rMode, bool ahp, double op);
+
+float  vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
+double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
 
 static inline double
 makeDouble(uint32_t low, uint32_t high)
@@ -249,19 +254,192 @@ highFromDouble(double val)
     return fpToBits(val) >> 32;
 }
 
-uint64_t vfpFpSToFixed(float val, bool isSigned, bool half,
-                       uint8_t imm, bool rzero = true);
-float vfpUFixedToFpS(bool flush, bool defaultNan,
-        uint32_t val, bool half, uint8_t imm);
-float vfpSFixedToFpS(bool flush, bool defaultNan,
-        int32_t val, bool half, uint8_t imm);
+static inline void
+setFPExceptions(int exceptions) {
+    feclearexcept(FeAllExceptions);
+    feraiseexcept(exceptions);
+}
+
+template <typename T>
+uint64_t
+vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
+             useRmode = true, VfpRoundingMode roundMode = VfpRoundZero,
+             bool aarch64 = false)
+{
+    int  rmode;
+    bool roundAwayFix = false;
+
+    if (!useRmode) {
+        rmode = fegetround();
+    } else {
+        switch (roundMode)
+        {
+          case VfpRoundNearest:
+            rmode = FeRoundNearest;
+            break;
+          case VfpRoundUpward:
+            rmode = FeRoundUpward;
+            break;
+          case VfpRoundDown:
+            rmode = FeRoundDown;
+            break;
+          case VfpRoundZero:
+            rmode = FeRoundZero;
+            break;
+          case VfpRoundAway:
+            // There is no equivalent rounding mode, use round down and we'll
+            // fix it later
+            rmode        = FeRoundDown;
+            roundAwayFix = true;
+            break;
+          default:
+            panic("Unsupported roundMode %d\n", roundMode);
+        }
+    }
+    __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
+    fesetround(FeRoundNearest);
+    val = val * pow(2.0, imm);
+    __asm__ __volatile__("" : "=m" (val) : "m" (val));
+    fesetround(rmode);
+    feclearexcept(FeAllExceptions);
+    __asm__ __volatile__("" : "=m" (val) : "m" (val));
+    T origVal = val;
+    val = rint(val);
+    __asm__ __volatile__("" : "=m" (val) : "m" (val));
+
+    int exceptions = fetestexcept(FeAllExceptions);
+
+    int fpType = std::fpclassify(val);
+    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
+        if (fpType == FP_NAN) {
+            exceptions |= FeInvalid;
+        }
+        val = 0.0;
+    } else if (origVal != val) {
+        switch (rmode) {
+          case FeRoundNearest:
+            if (origVal - val > 0.5)
+                val += 1.0;
+            else if (val - origVal > 0.5)
+                val -= 1.0;
+            break;
+          case FeRoundDown:
+            if (roundAwayFix) {
+                // The ordering on the subtraction looks a bit odd in that we
+                // don't do the obvious origVal - val, instead we do
+                // -(val - origVal). This is required to get the corruct bit
+                // exact behaviour when very close to the 0.5 threshold.
+                volatile T error = val;
+                error -= origVal;
+                error = -error;
+                if ( (error >  0.5) ||
+                    ((error == 0.5) && (val >= 0)) )
+                    val += 1.0;
+            } else {
+                if (origVal < val)
+                    val -= 1.0;
+            }
+            break;
+          case FeRoundUpward:
+            if (origVal > val)
+                val += 1.0;
+            break;
+        }
+        exceptions |= FeInexact;
+    }
+
+    __asm__ __volatile__("" : "=m" (val) : "m" (val));
+
+    if (isSigned) {
+        bool     outOfRange = false;
+        int64_t  result     = (int64_t) val;
+        uint64_t finalVal;
+
+        if (!aarch64) {
+            if (width == 16) {
+                finalVal = (int16_t)val;
+            } else if (width == 32) {
+                finalVal =(int32_t)val;
+            } else if (width == 64) {
+                finalVal = result;
+            } else {
+                panic("Unsupported width %d\n", width);
+            }
+
+            // check if value is in range
+            int64_t minVal = ~mask(width-1);
+            if ((double)val < minVal) {
+                outOfRange = true;
+                finalVal = minVal;
+            }
+            int64_t maxVal = mask(width-1);
+            if ((double)val > maxVal) {
+                outOfRange = true;
+                finalVal = maxVal;
+            }
+        } else {
+            bool isNeg = val < 0;
+            finalVal = result & mask(width);
+            // If the result is supposed to be less than 64 bits check that the
+            // upper bits that got thrown away are just sign extension bits
+            if (width != 64) {
+                outOfRange = ((uint64_t) result >> (width - 1)) !=
+                             (isNeg ? mask(64-width+1) : 0);
+            }
+            // Check if the original floating point value doesn't matches the
+            // integer version we are also out of range. So create a saturated
+            // result.
+            if (isNeg) {
+                outOfRange |= val < result;
+                if (outOfRange) {
+                    finalVal = 1LL << (width-1);
+                }
+            } else {
+                outOfRange |= val > result;
+                if (outOfRange) {
+                    finalVal = mask(width-1);
+                }
+            }
+        }
+
+        // Raise an exception if the value was out of range
+        if (outOfRange) {
+            exceptions |= FeInvalid;
+            exceptions &= ~FeInexact;
+        }
+        setFPExceptions(exceptions);
+        return finalVal;
+    } else {
+        if ((double)val < 0) {
+            exceptions |= FeInvalid;
+            exceptions &= ~FeInexact;
+            setFPExceptions(exceptions);
+            return 0;
+        }
+
+        uint64_t result = ((uint64_t) val) & mask(width);
+        if (val > result) {
+            exceptions |= FeInvalid;
+            exceptions &= ~FeInexact;
+            setFPExceptions(exceptions);
+            return mask(width);
+        }
+
+        setFPExceptions(exceptions);
+        return result;
+    }
+};
+
+
+float vfpUFixedToFpS(bool flush, bool defaultNan,
+        uint64_t val, uint8_t width, uint8_t imm);
+float vfpSFixedToFpS(bool flush, bool defaultNan,
+        int64_t val, uint8_t width, uint8_t imm);
 
-uint64_t vfpFpDToFixed(double val, bool isSigned, bool half,
-                       uint8_t imm, bool rzero = true);
 double vfpUFixedToFpD(bool flush, bool defaultNan,
-        uint32_t val, bool half, uint8_t imm);
+        uint64_t val, uint8_t width, uint8_t imm);
 double vfpSFixedToFpD(bool flush, bool defaultNan,
-        int32_t val, bool half, uint8_t imm);
+        int64_t val, uint8_t width, uint8_t imm);
 
 float fprSqrtEstimate(FPSCR &fpscr, float op);
 uint32_t unsignedRSqrtEstimate(uint32_t op);
@@ -292,6 +470,20 @@ class VfpMacroOp : public PredMacroOp
     void nextIdxs(IntRegIndex &dest);
 };
 
+template <typename T>
+static inline T
+fpAdd(T a, T b)
+{
+    return a + b;
+};
+
+template <typename T>
+static inline T
+fpSub(T a, T b)
+{
+    return a - b;
+};
+
 static inline float
 fpAddS(float a, float b)
 {
@@ -328,6 +520,54 @@ fpDivD(double a, double b)
     return a / b;
 }
 
+template <typename T>
+static inline T
+fpDiv(T a, T b)
+{
+    return a / b;
+};
+
+template <typename T>
+static inline T
+fpMulX(T a, T b)
+{
+    uint64_t opData;
+    uint32_t sign1;
+    uint32_t sign2;
+    const bool single = (sizeof(T) == sizeof(float));
+    if (single) {
+        opData = (fpToBits(a));
+        sign1 = opData>>31;
+        opData = (fpToBits(b));
+        sign2 = opData>>31;
+    } else {
+        opData = (fpToBits(a));
+        sign1 = opData>>63;
+        opData = (fpToBits(b));
+        sign2 = opData>>63;
+    }
+    bool inf1 = (std::fpclassify(a) == FP_INFINITE);
+    bool inf2 = (std::fpclassify(b) == FP_INFINITE);
+    bool zero1 = (std::fpclassify(a) == FP_ZERO);
+    bool zero2 = (std::fpclassify(b) == FP_ZERO);
+    if ((inf1 && zero2) || (zero1 && inf2)) {
+        if(sign1 ^ sign2)
+            return (T)(-2.0);
+        else
+            return (T)(2.0);
+    } else {
+        return (a * b);
+    }
+};
+
+
+template <typename T>
+static inline T
+fpMul(T a, T b)
+{
+    return a * b;
+};
+
 static inline float
 fpMulS(float a, float b)
 {
@@ -340,23 +580,140 @@ fpMulD(double a, double b)
     return a * b;
 }
 
-static inline float
-fpMaxS(float a, float b)
+template <typename T>
+static inline T
+// @todo remove this when all calls to it have been replaced with the new fplib implementation
+fpMulAdd(T op1, T op2, T addend)
 {
+    T result;
+
+    if (sizeof(T) == sizeof(float))
+        result = fmaf(op1, op2, addend);
+    else
+        result = fma(op1, op2, addend);
+
+    // ARM doesn't generate signed nan's from this opperation, so fix up the result
+    if (std::isnan(result) && !std::isnan(op1) &&
+        !std::isnan(op2) && !std::isnan(addend))
+    {
+        uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1);
+        result = bitsToFp(fpToBits(result) & ~bitMask, op1);
+    }
+    return result;
+}
+
+template <typename T>
+static inline T
+fpRIntX(T a, FPSCR &fpscr)
+{
+    T rVal;
+
+    rVal = rint(a);
+    if (rVal != a && !std::isnan(a))
+        fpscr.ixc = 1;
+    return (rVal);
+};
+
+template <typename T>
+static inline T
+fpMaxNum(T a, T b)
+{
+    const bool     single = (sizeof(T) == sizeof(float));
+    const uint64_t qnan   = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
+
+    if (std::isnan(a))
+        return ((fpToBits(a) & qnan) == qnan) ? b : a;
+    if (std::isnan(b))
+        return ((fpToBits(b) & qnan) == qnan) ? a : b;
     // Handle comparisons of +0 and -0.
     if (!std::signbit(a) && std::signbit(b))
         return a;
-    return fmaxf(a, b);
-}
+    return fmax(a, b);
+};
 
-static inline float
-fpMinS(float a, float b)
+template <typename T>
+static inline T
+fpMax(T a, T b)
 {
+    if (std::isnan(a))
+        return a;
+    if (std::isnan(b))
+        return b;
+    return fpMaxNum<T>(a, b);
+};
+
+template <typename T>
+static inline T
+fpMinNum(T a, T b)
+{
+    const bool     single = (sizeof(T) == sizeof(float));
+    const uint64_t qnan   = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
+
+    if (std::isnan(a))
+        return ((fpToBits(a) & qnan) == qnan) ? b : a;
+    if (std::isnan(b))
+        return ((fpToBits(b) & qnan) == qnan) ? a : b;
     // Handle comparisons of +0 and -0.
     if (std::signbit(a) && !std::signbit(b))
         return a;
-    return fminf(a, b);
-}
+    return fmin(a, b);
+};
+
+template <typename T>
+static inline T
+fpMin(T a, T b)
+{
+    if (std::isnan(a))
+        return a;
+    if (std::isnan(b))
+        return b;
+    return fpMinNum<T>(a, b);
+};
+
+template <typename T>
+static inline T
+fpRSqrts(T a, T b)
+{
+    int fpClassA = std::fpclassify(a);
+    int fpClassB = std::fpclassify(b);
+    T aXb;
+    int fpClassAxB;
+
+    if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
+        (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
+        return 1.5;
+    }
+    aXb = a*b;
+    fpClassAxB = std::fpclassify(aXb);
+    if(fpClassAxB == FP_SUBNORMAL) {
+       feraiseexcept(FeUnderflow);
+       return 1.5;
+    }
+    return (3.0 - (a * b)) / 2.0;
+};
+
+template <typename T>
+static inline T
+fpRecps(T a, T b)
+{
+    int fpClassA = std::fpclassify(a);
+    int fpClassB = std::fpclassify(b);
+    T aXb;
+    int fpClassAxB;
+
+    if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
+        (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
+        return 2.0;
+    }
+    aXb = a*b;
+    fpClassAxB = std::fpclassify(aXb);
+    if(fpClassAxB == FP_SUBNORMAL) {
+       feraiseexcept(FeUnderflow);
+       return 2.0;
+    }
+    return 2.0 - (a * b);
+};
+
 
 static inline float
 fpRSqrtsS(float a, float b)
@@ -400,6 +757,23 @@ fpRecpsS(float a, float b)
     return 2.0 - (a * b);
 }
 
+template <typename T>
+static inline T
+roundNEven(T a) {
+    T val;
+
+    val = round(a);
+    if (a - val == 0.5) {
+        if ( (((int) a) & 1) == 0 ) val += 1.0;
+    }
+    else if (a - val == -0.5) {
+        if ( (((int) a) & 1) == 0 ) val -= 1.0;
+    }
+    return val;
+}
+
+
+
 class FpOp : public PredOp
 {
   protected:
@@ -455,6 +829,12 @@ class FpOp : public PredOp
     processNans(FPSCR &fpscr, bool &done, bool defaultNan,
                 fpType op1, fpType op2) const;
 
+    template <class fpType>
+    fpType
+    ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
+              fpType (*func)(fpType, fpType, fpType),
+              bool flush, bool defaultNan, uint32_t rMode) const;
+
     template <class fpType>
     fpType
     binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
@@ -478,6 +858,55 @@ class FpOp : public PredOp
             pcState.advance();
         }
     }
+
+    float
+    fpSqrt (FPSCR fpscr,float x) const
+    {
+
+        return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode);
+
+    }
+
+    double
+    fpSqrt (FPSCR fpscr,double x) const
+    {
+
+        return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode);
+
+    }
+};
+
+class FpCondCompRegOp : public FpOp
+{
+  protected:
+    IntRegIndex op1, op2;
+    ConditionCode condCode;
+    uint8_t defCc;
+
+    FpCondCompRegOp(const char *mnem, ExtMachInst _machInst,
+                       OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2,
+                       ConditionCode _condCode, uint8_t _defCc) :
+        FpOp(mnem, _machInst, __opClass),
+        op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
+class FpCondSelOp : public FpOp
+{
+  protected:
+    IntRegIndex dest, op1, op2;
+    ConditionCode condCode;
+
+    FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                ConditionCode _condCode) :
+        FpOp(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
+    {}
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 };
 
 class FpRegRegOp : public FpOp
@@ -550,6 +979,26 @@ class FpRegRegRegOp : public FpOp
     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 };
 
+class FpRegRegRegRegOp : public FpOp
+{
+  protected:
+    IntRegIndex dest;
+    IntRegIndex op1;
+    IntRegIndex op2;
+    IntRegIndex op3;
+
+    FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+                     IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                     IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) :
+        FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
+        op3(_op3)
+    {
+        setVfpMicroFlags(mode, flags);
+    }
+
+    std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
+};
+
 class FpRegRegRegImmOp : public FpOp
 {
   protected:
diff --git a/src/arch/arm/interrupts.cc b/src/arch/arm/interrupts.cc
index c05ae984e..6682b75a0 100644
--- a/src/arch/arm/interrupts.cc
+++ b/src/arch/arm/interrupts.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009 ARM Limited
+ * Copyright (c) 2009, 2012-2013 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -38,9 +38,128 @@
  */
 
 #include "arch/arm/interrupts.hh"
+#include "arch/arm/system.hh"
     
 ArmISA::Interrupts *
 ArmInterruptsParams::create()
 {
     return new ArmISA::Interrupts(this);
 }
+
+bool
+ArmISA::Interrupts::takeInt(ThreadContext *tc, InterruptTypes int_type) const
+{
+    // Table G1-17~19 of ARM V8 ARM
+    InterruptMask mask;
+    bool highest_el_is_64 = ArmSystem::highestELIs64(tc);
+
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    SCR scr;
+    HCR hcr;
+    hcr = tc->readMiscReg(MISCREG_HCR);
+    ExceptionLevel el = (ExceptionLevel) ((uint32_t) cpsr.el);
+    bool cpsr_mask_bit, scr_routing_bit, scr_fwaw_bit, hcr_mask_override_bit;
+
+    if (!highest_el_is_64)
+        scr = tc->readMiscReg(MISCREG_SCR);
+    else
+        scr = tc->readMiscReg(MISCREG_SCR_EL3);
+
+    bool is_secure = inSecureState(scr, cpsr);
+
+    switch(int_type) {
+      case INT_FIQ:
+        cpsr_mask_bit = cpsr.f;
+        scr_routing_bit = scr.fiq;
+        scr_fwaw_bit = scr.fw;
+        hcr_mask_override_bit = hcr.fmo;
+        break;
+      case INT_IRQ:
+        cpsr_mask_bit = cpsr.i;
+        scr_routing_bit = scr.irq;
+        scr_fwaw_bit = 1;
+        hcr_mask_override_bit = hcr.imo;
+        break;
+      case INT_ABT:
+        cpsr_mask_bit = cpsr.a;
+        scr_routing_bit = scr.ea;
+        scr_fwaw_bit = scr.aw;
+        hcr_mask_override_bit = hcr.amo;
+        break;
+      default:
+        panic("Unhandled interrupt type!");
+    }
+
+    if (hcr.tge)
+        hcr_mask_override_bit = 1;
+
+    if (!highest_el_is_64) {
+        // AArch32
+        if (!scr_routing_bit) {
+            // SCR IRQ == 0
+            if (!hcr_mask_override_bit)
+                mask = INT_MASK_M;
+            else {
+                if (!is_secure && (el == EL0 || el == EL1))
+                    mask = INT_MASK_T;
+                else
+                    mask = INT_MASK_M;
+            }
+        } else {
+            // SCR IRQ == 1
+            if ((!is_secure) &&
+                (hcr_mask_override_bit ||
+                    (!scr_fwaw_bit && !hcr_mask_override_bit)))
+                mask = INT_MASK_T;
+            else
+                mask = INT_MASK_M;
+        }
+    } else {
+        // AArch64
+        if (!scr_routing_bit) {
+            // SCR IRQ == 0
+            if (!scr.rw) {
+                // SCR RW == 0
+                if (!hcr_mask_override_bit) {
+                    if (el == EL3)
+                        mask = INT_MASK_P;
+                    else
+                        mask = INT_MASK_M;
+                } else {
+                    if (el == EL3)
+                        mask = INT_MASK_T;
+                    else if (is_secure || el == EL2)
+                        mask = INT_MASK_M;
+                    else
+                        mask = INT_MASK_T;
+                }
+            } else {
+                // SCR RW == 1
+                if (!hcr_mask_override_bit) {
+                    if (el == EL3 || el == EL2)
+                        mask = INT_MASK_P;
+                    else
+                        mask = INT_MASK_M;
+                } else {
+                    if (el == EL3)
+                        mask = INT_MASK_P;
+                    else if (is_secure || el == EL2)
+                        mask = INT_MASK_M;
+                    else
+                        mask = INT_MASK_T;
+                }
+            }
+        } else {
+            // SCR IRQ == 1
+            if (el == EL3)
+                mask = INT_MASK_M;
+            else
+                mask = INT_MASK_T;
+        }
+    }
+
+    return ((mask == INT_MASK_T) ||
+            ((mask == INT_MASK_M) && !cpsr_mask_bit)) &&
+            (mask != INT_MASK_P);
+}
+
diff --git a/src/arch/arm/interrupts.hh b/src/arch/arm/interrupts.hh
index 7def6ddd6..8e6c2b261 100644
--- a/src/arch/arm/interrupts.hh
+++ b/src/arch/arm/interrupts.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010,2012 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -47,6 +47,7 @@
 #include "arch/arm/isa_traits.hh"
 #include "arch/arm/miscregs.hh"
 #include "arch/arm/registers.hh"
+#include "arch/arm/utility.hh"
 #include "cpu/thread_context.hh"
 #include "debug/Interrupt.hh"
 #include "params/ArmInterrupts.hh"
@@ -123,31 +124,79 @@ class Interrupts : public SimObject
         memset(interrupts, 0, sizeof(interrupts));
     }
 
+    enum InterruptMask {
+        INT_MASK_M, // masked (subject to PSTATE.{A,I,F} mask bit
+        INT_MASK_T, // taken regardless of mask
+        INT_MASK_P  // pending
+    };
+
+    bool takeInt(ThreadContext *tc, InterruptTypes int_type) const;
+
     bool
     checkInterrupts(ThreadContext *tc) const
     {
-        if (!intStatus)
+        HCR  hcr  = tc->readMiscReg(MISCREG_HCR);
+
+        if (!(intStatus || hcr.va || hcr.vi || hcr.vf))
             return false;
 
         CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+        SCR  scr  = tc->readMiscReg(MISCREG_SCR);
 
-        return ((interrupts[INT_IRQ] && !cpsr.i) ||
-                (interrupts[INT_FIQ] && !cpsr.f) ||
-                (interrupts[INT_ABT] && !cpsr.a) ||
-                (interrupts[INT_RST]) ||
-                (interrupts[INT_SEV]));
+        bool isHypMode   = cpsr.mode == MODE_HYP;
+        bool isSecure    = inSecureState(scr, cpsr);
+        bool allowVIrq   = !cpsr.i && hcr.imo && !isSecure && !isHypMode;
+        bool allowVFiq   = !cpsr.f && hcr.fmo && !isSecure && !isHypMode;
+        bool allowVAbort = !cpsr.a && hcr.amo && !isSecure && !isHypMode;
+
+        bool take_irq = takeInt(tc, INT_IRQ);
+        bool take_fiq = takeInt(tc, INT_FIQ);
+        bool take_ea =  takeInt(tc, INT_ABT);
+
+        return ((interrupts[INT_IRQ] && take_irq)                   ||
+                (interrupts[INT_FIQ] && take_fiq)                   ||
+                (interrupts[INT_ABT] && take_ea)                    ||
+                ((interrupts[INT_VIRT_IRQ] || hcr.vi) && allowVIrq) ||
+                ((interrupts[INT_VIRT_FIQ] || hcr.vf) && allowVFiq) ||
+                (hcr.va && allowVAbort)                             ||
+                (interrupts[INT_RST])                               ||
+                (interrupts[INT_SEV])
+               );
     }
 
     /**
-     * Check the raw interrupt state.
      * This function is used to check if a wfi operation should sleep. If there
      * is an interrupt pending, even if it's masked, wfi doesn't sleep.
      * @return any interrupts pending
      */
     bool
-    checkRaw() const
+    checkWfiWake(HCR hcr, CPSR cpsr, SCR scr) const
     {
-        return intStatus;
+        uint64_t maskedIntStatus;
+        bool     virtWake;
+
+        maskedIntStatus = intStatus & ~((1 << INT_VIRT_IRQ) |
+                                        (1 << INT_VIRT_FIQ));
+        virtWake  = (hcr.vi || interrupts[INT_VIRT_IRQ]) && hcr.imo;
+        virtWake |= (hcr.vf || interrupts[INT_VIRT_FIQ]) && hcr.fmo;
+        virtWake |=  hcr.va                              && hcr.amo;
+        virtWake &= (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr);
+        return maskedIntStatus || virtWake;
+    }
+
+    uint32_t
+    getISR(HCR hcr, CPSR cpsr, SCR scr)
+    {
+        bool useHcrMux;
+        CPSR isr = 0; // ARM ARM states ISR reg uses same bit possitions as CPSR
+
+        useHcrMux = (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr);
+        isr.i = (useHcrMux & hcr.imo) ? (interrupts[INT_VIRT_IRQ] || hcr.vi)
+                                      :  interrupts[INT_IRQ];
+        isr.f = (useHcrMux & hcr.fmo) ? (interrupts[INT_VIRT_FIQ] || hcr.vf)
+                                      :  interrupts[INT_FIQ];
+        isr.a = (useHcrMux & hcr.amo) ?  hcr.va : interrupts[INT_ABT];
+        return isr;
     }
 
     /**
@@ -172,22 +221,45 @@ class Interrupts : public SimObject
     Fault
     getInterrupt(ThreadContext *tc)
     {
-        if (!intStatus)
+        HCR  hcr  = tc->readMiscReg(MISCREG_HCR);
+        CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+        SCR  scr  = tc->readMiscReg(MISCREG_SCR);
+
+        // Calculate a few temp vars so we can work out if there's a pending
+        // virtual interrupt, and if its allowed to happen
+        // ARM ARM Issue C section B1.9.9, B1.9.11, and B1.9.13
+        bool isHypMode   = cpsr.mode == MODE_HYP;
+        bool isSecure    = inSecureState(scr, cpsr);
+        bool allowVIrq   = !cpsr.i && hcr.imo && !isSecure && !isHypMode;
+        bool allowVFiq   = !cpsr.f && hcr.fmo && !isSecure && !isHypMode;
+        bool allowVAbort = !cpsr.a && hcr.amo && !isSecure && !isHypMode;
+
+        if ( !(intStatus || (hcr.vi && allowVIrq) || (hcr.vf && allowVFiq) ||
+               (hcr.va && allowVAbort)) )
             return NoFault;
 
-        CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+        bool take_irq = takeInt(tc, INT_IRQ);
+        bool take_fiq = takeInt(tc, INT_FIQ);
+        bool take_ea =  takeInt(tc, INT_ABT);
 
-        if (interrupts[INT_IRQ] && !cpsr.i)
+
+        if (interrupts[INT_IRQ] && take_irq)
             return new Interrupt;
-        if (interrupts[INT_FIQ] && !cpsr.f)
+        if ((interrupts[INT_VIRT_IRQ] || hcr.vi) && allowVIrq)
+            return new VirtualInterrupt;
+        if (interrupts[INT_FIQ] && take_fiq)
             return new FastInterrupt;
-        if (interrupts[INT_ABT] && !cpsr.a)
-            return new DataAbort(0, false, 0,
-                    ArmFault::AsynchronousExternalAbort);
+        if ((interrupts[INT_VIRT_FIQ] || hcr.vf) && allowVFiq)
+            return new VirtualFastInterrupt;
+        if (interrupts[INT_ABT] && take_ea)
+            return new SystemError;
+        if (hcr.va && allowVAbort)
+            return new VirtualDataAbort(0, TlbEntry::DomainType::NoAccess, false,
+                                 ArmFault::AsynchronousExternalAbort);
         if (interrupts[INT_RST])
-           return new Reset;
+            return new Reset;
         if (interrupts[INT_SEV])
-           return new ArmSev;
+            return new ArmSev;
 
         panic("intStatus and interrupts not in sync\n");
     }
diff --git a/src/arch/arm/intregs.hh b/src/arch/arm/intregs.hh
index 3fe00b765..fa18aa68d 100644
--- a/src/arch/arm/intregs.hh
+++ b/src/arch/arm/intregs.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -83,6 +83,9 @@ enum IntRegIndex
     INTREG_R14_MON,
     INTREG_LR_MON = INTREG_R14_MON,
 
+    INTREG_R13_HYP,
+    INTREG_SP_HYP = INTREG_R13_HYP,
+
     INTREG_R13_ABT,
     INTREG_SP_ABT = INTREG_R13_ABT,
     INTREG_R14_ABT,
@@ -108,7 +111,7 @@ enum IntRegIndex
     INTREG_R14_FIQ,
     INTREG_LR_FIQ = INTREG_R14_FIQ,
 
-    INTREG_ZERO, // Dummy zero reg since there has to be one.
+    INTREG_ZERO,
     INTREG_UREG0,
     INTREG_UREG1,
     INTREG_UREG2,
@@ -117,12 +120,54 @@ enum IntRegIndex
     INTREG_CONDCODES_V,
     INTREG_CONDCODES_GE,
     INTREG_FPCONDCODES,
+    INTREG_DUMMY, // Dummy reg used to throw away int reg results
+
+    INTREG_SP0,
+    INTREG_SP1,
+    INTREG_SP2,
+    INTREG_SP3,
 
     NUM_INTREGS,
-    NUM_ARCH_INTREGS = INTREG_PC + 1,
+    NUM_ARCH_INTREGS = 32,
+
+    /* AArch64 registers */
+    INTREG_X0 = 0,
+    INTREG_X1,
+    INTREG_X2,
+    INTREG_X3,
+    INTREG_X4,
+    INTREG_X5,
+    INTREG_X6,
+    INTREG_X7,
+    INTREG_X8,
+    INTREG_X9,
+    INTREG_X10,
+    INTREG_X11,
+    INTREG_X12,
+    INTREG_X13,
+    INTREG_X14,
+    INTREG_X15,
+    INTREG_X16,
+    INTREG_X17,
+    INTREG_X18,
+    INTREG_X19,
+    INTREG_X20,
+    INTREG_X21,
+    INTREG_X22,
+    INTREG_X23,
+    INTREG_X24,
+    INTREG_X25,
+    INTREG_X26,
+    INTREG_X27,
+    INTREG_X28,
+    INTREG_X29,
+    INTREG_X30,
+    INTREG_X31,
+
+    INTREG_SPX = NUM_INTREGS,
 
     /* All the aliased indexes. */
-    
+
     /* USR mode */
     INTREG_R0_USR = INTREG_R0,
     INTREG_R1_USR = INTREG_R1,
@@ -195,6 +240,25 @@ enum IntRegIndex
     INTREG_PC_ABT = INTREG_PC,
     INTREG_R15_ABT = INTREG_R15,
 
+    /* HYP mode */
+    INTREG_R0_HYP = INTREG_R0,
+    INTREG_R1_HYP = INTREG_R1,
+    INTREG_R2_HYP = INTREG_R2,
+    INTREG_R3_HYP = INTREG_R3,
+    INTREG_R4_HYP = INTREG_R4,
+    INTREG_R5_HYP = INTREG_R5,
+    INTREG_R6_HYP = INTREG_R6,
+    INTREG_R7_HYP = INTREG_R7,
+    INTREG_R8_HYP = INTREG_R8,
+    INTREG_R9_HYP = INTREG_R9,
+    INTREG_R10_HYP = INTREG_R10,
+    INTREG_R11_HYP = INTREG_R11,
+    INTREG_R12_HYP = INTREG_R12,
+    INTREG_LR_HYP = INTREG_LR,
+    INTREG_R14_HYP = INTREG_R14,
+    INTREG_PC_HYP = INTREG_PC,
+    INTREG_R15_HYP = INTREG_R15,
+
     /* UND mode */
     INTREG_R0_UND = INTREG_R0,
     INTREG_R1_UND = INTREG_R1,
@@ -244,11 +308,26 @@ enum IntRegIndex
 
 typedef IntRegIndex IntRegMap[NUM_ARCH_INTREGS];
 
+const IntRegMap IntReg64Map = {
+    INTREG_R0,      INTREG_R1,      INTREG_R2,      INTREG_R3,
+    INTREG_R4,      INTREG_R5,      INTREG_R6,      INTREG_R7,
+    INTREG_R8_USR,  INTREG_R9_USR,  INTREG_R10_USR, INTREG_R11_USR,
+    INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R13_HYP,
+    INTREG_R14_IRQ, INTREG_R13_IRQ, INTREG_R14_SVC, INTREG_R13_SVC,
+    INTREG_R14_ABT, INTREG_R13_ABT, INTREG_R14_UND, INTREG_R13_UND,
+    INTREG_R8_FIQ,  INTREG_R9_FIQ,  INTREG_R10_FIQ, INTREG_R11_FIQ,
+    INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_ZERO
+};
+
 const IntRegMap IntRegUsrMap = {
     INTREG_R0_USR,  INTREG_R1_USR,  INTREG_R2_USR,  INTREG_R3_USR,
     INTREG_R4_USR,  INTREG_R5_USR,  INTREG_R6_USR,  INTREG_R7_USR,
     INTREG_R8_USR,  INTREG_R9_USR,  INTREG_R10_USR, INTREG_R11_USR,
-    INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R15_USR
+    INTREG_R12_USR, INTREG_R13_USR, INTREG_R14_USR, INTREG_R15_USR,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO
 };
 
 static inline IntRegIndex
@@ -258,11 +337,33 @@ INTREG_USR(unsigned index)
     return IntRegUsrMap[index];
 }
 
+const IntRegMap IntRegHypMap = {
+    INTREG_R0_HYP,  INTREG_R1_HYP,  INTREG_R2_HYP,  INTREG_R3_HYP,
+    INTREG_R4_HYP,  INTREG_R5_HYP,  INTREG_R6_HYP,  INTREG_R7_HYP,
+    INTREG_R8_HYP,  INTREG_R9_HYP,  INTREG_R10_HYP, INTREG_R11_HYP,
+    INTREG_R12_HYP, INTREG_R13_HYP, INTREG_R14_HYP, INTREG_R15_HYP,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO
+};
+
+static inline IntRegIndex
+INTREG_HYP(unsigned index)
+{
+    assert(index < NUM_ARCH_INTREGS);
+    return IntRegHypMap[index];
+}
+
 const IntRegMap IntRegSvcMap = {
     INTREG_R0_SVC,  INTREG_R1_SVC,  INTREG_R2_SVC,  INTREG_R3_SVC,
     INTREG_R4_SVC,  INTREG_R5_SVC,  INTREG_R6_SVC,  INTREG_R7_SVC,
     INTREG_R8_SVC,  INTREG_R9_SVC,  INTREG_R10_SVC, INTREG_R11_SVC,
-    INTREG_R12_SVC, INTREG_R13_SVC, INTREG_R14_SVC, INTREG_R15_SVC
+    INTREG_R12_SVC, INTREG_R13_SVC, INTREG_R14_SVC, INTREG_R15_SVC,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO
 };
 
 static inline IntRegIndex
@@ -276,7 +377,11 @@ const IntRegMap IntRegMonMap = {
     INTREG_R0_MON,  INTREG_R1_MON,  INTREG_R2_MON,  INTREG_R3_MON,
     INTREG_R4_MON,  INTREG_R5_MON,  INTREG_R6_MON,  INTREG_R7_MON,
     INTREG_R8_MON,  INTREG_R9_MON,  INTREG_R10_MON, INTREG_R11_MON,
-    INTREG_R12_MON, INTREG_R13_MON, INTREG_R14_MON, INTREG_R15_MON
+    INTREG_R12_MON, INTREG_R13_MON, INTREG_R14_MON, INTREG_R15_MON,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO
 };
 
 static inline IntRegIndex
@@ -290,7 +395,11 @@ const IntRegMap IntRegAbtMap = {
     INTREG_R0_ABT,  INTREG_R1_ABT,  INTREG_R2_ABT,  INTREG_R3_ABT,
     INTREG_R4_ABT,  INTREG_R5_ABT,  INTREG_R6_ABT,  INTREG_R7_ABT,
     INTREG_R8_ABT,  INTREG_R9_ABT,  INTREG_R10_ABT, INTREG_R11_ABT,
-    INTREG_R12_ABT, INTREG_R13_ABT, INTREG_R14_ABT, INTREG_R15_ABT
+    INTREG_R12_ABT, INTREG_R13_ABT, INTREG_R14_ABT, INTREG_R15_ABT,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO
 };
 
 static inline IntRegIndex
@@ -304,7 +413,11 @@ const IntRegMap IntRegUndMap = {
     INTREG_R0_UND,  INTREG_R1_UND,  INTREG_R2_UND,  INTREG_R3_UND,
     INTREG_R4_UND,  INTREG_R5_UND,  INTREG_R6_UND,  INTREG_R7_UND,
     INTREG_R8_UND,  INTREG_R9_UND,  INTREG_R10_UND, INTREG_R11_UND,
-    INTREG_R12_UND, INTREG_R13_UND, INTREG_R14_UND, INTREG_R15_UND
+    INTREG_R12_UND, INTREG_R13_UND, INTREG_R14_UND, INTREG_R15_UND,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO
 };
 
 static inline IntRegIndex
@@ -318,7 +431,11 @@ const IntRegMap IntRegIrqMap = {
     INTREG_R0_IRQ,  INTREG_R1_IRQ,  INTREG_R2_IRQ,  INTREG_R3_IRQ,
     INTREG_R4_IRQ,  INTREG_R5_IRQ,  INTREG_R6_IRQ,  INTREG_R7_IRQ,
     INTREG_R8_IRQ,  INTREG_R9_IRQ,  INTREG_R10_IRQ, INTREG_R11_IRQ,
-    INTREG_R12_IRQ, INTREG_R13_IRQ, INTREG_R14_IRQ, INTREG_R15_IRQ
+    INTREG_R12_IRQ, INTREG_R13_IRQ, INTREG_R14_IRQ, INTREG_R15_IRQ,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO
 };
 
 static inline IntRegIndex
@@ -332,7 +449,11 @@ const IntRegMap IntRegFiqMap = {
     INTREG_R0_FIQ,  INTREG_R1_FIQ,  INTREG_R2_FIQ,  INTREG_R3_FIQ,
     INTREG_R4_FIQ,  INTREG_R5_FIQ,  INTREG_R6_FIQ,  INTREG_R7_FIQ,
     INTREG_R8_FIQ,  INTREG_R9_FIQ,  INTREG_R10_FIQ, INTREG_R11_FIQ,
-    INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_R15_FIQ
+    INTREG_R12_FIQ, INTREG_R13_FIQ, INTREG_R14_FIQ, INTREG_R15_FIQ,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,
+    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO,    INTREG_ZERO
 };
 
 static inline IntRegIndex
@@ -351,6 +472,51 @@ intRegInMode(OperatingMode mode, int reg)
     return mode * intRegsPerMode + reg;
 }
 
+static inline int
+flattenIntRegModeIndex(int reg)
+{
+    int mode = reg / intRegsPerMode;
+    reg = reg % intRegsPerMode;
+    switch (mode) {
+      case MODE_USER:
+      case MODE_SYSTEM:
+        return INTREG_USR(reg);
+      case MODE_FIQ:
+        return INTREG_FIQ(reg);
+      case MODE_IRQ:
+        return INTREG_IRQ(reg);
+      case MODE_SVC:
+        return INTREG_SVC(reg);
+      case MODE_MON:
+        return INTREG_MON(reg);
+      case MODE_ABORT:
+        return INTREG_ABT(reg);
+      case MODE_HYP:
+        return INTREG_HYP(reg);
+      case MODE_UNDEFINED:
+        return INTREG_UND(reg);
+      default:
+        panic("%d: Flattening into an unknown mode: reg:%#x mode:%#x\n",
+                curTick(), reg, mode);
+    }
+}
+
+
+static inline IntRegIndex
+makeSP(IntRegIndex reg)
+{
+    if (reg == INTREG_X31)
+        reg = INTREG_SPX;
+    return reg;
+}
+
+
+static inline bool
+isSP(IntRegIndex reg)
+{
+    return reg == INTREG_SPX;
+}
+
 }
 
 #endif
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 86be2803d..4f1ef91ec 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -51,12 +51,111 @@
 namespace ArmISA
 {
 
+
+/**
+ * Some registers aliase with others, and therefore need to be translated.
+ * For each entry:
+ * The first value is the misc register that is to be looked up
+ * the second value is the lower part of the translation
+ * the third the upper part
+ */
+const struct ISA::MiscRegInitializerEntry
+    ISA::MiscRegSwitch[miscRegTranslateMax] = {
+    {MISCREG_CSSELR_EL1, {MISCREG_CSSELR, 0}},
+    {MISCREG_SCTLR_EL1, {MISCREG_SCTLR, 0}},
+    {MISCREG_SCTLR_EL2, {MISCREG_HSCTLR, 0}},
+    {MISCREG_ACTLR_EL1, {MISCREG_ACTLR, 0}},
+    {MISCREG_ACTLR_EL2, {MISCREG_HACTLR, 0}},
+    {MISCREG_CPACR_EL1, {MISCREG_CPACR, 0}},
+    {MISCREG_CPTR_EL2, {MISCREG_HCPTR, 0}},
+    {MISCREG_HCR_EL2, {MISCREG_HCR, 0}},
+    {MISCREG_MDCR_EL2, {MISCREG_HDCR, 0}},
+    {MISCREG_HSTR_EL2, {MISCREG_HSTR, 0}},
+    {MISCREG_HACR_EL2, {MISCREG_HACR, 0}},
+    {MISCREG_TTBR0_EL1, {MISCREG_TTBR0, 0}},
+    {MISCREG_TTBR1_EL1, {MISCREG_TTBR1, 0}},
+    {MISCREG_TTBR0_EL2, {MISCREG_HTTBR, 0}},
+    {MISCREG_VTTBR_EL2, {MISCREG_VTTBR, 0}},
+    {MISCREG_TCR_EL1, {MISCREG_TTBCR, 0}},
+    {MISCREG_TCR_EL2, {MISCREG_HTCR, 0}},
+    {MISCREG_VTCR_EL2, {MISCREG_VTCR, 0}},
+    {MISCREG_AFSR0_EL1, {MISCREG_ADFSR, 0}},
+    {MISCREG_AFSR1_EL1, {MISCREG_AIFSR, 0}},
+    {MISCREG_AFSR0_EL2, {MISCREG_HADFSR, 0}},
+    {MISCREG_AFSR1_EL2, {MISCREG_HAIFSR, 0}},
+    {MISCREG_ESR_EL2, {MISCREG_HSR, 0}},
+    {MISCREG_FAR_EL1, {MISCREG_DFAR, MISCREG_IFAR}},
+    {MISCREG_FAR_EL2, {MISCREG_HDFAR, MISCREG_HIFAR}},
+    {MISCREG_HPFAR_EL2, {MISCREG_HPFAR, 0}},
+    {MISCREG_PAR_EL1, {MISCREG_PAR, 0}},
+    {MISCREG_MAIR_EL1, {MISCREG_PRRR, MISCREG_NMRR}},
+    {MISCREG_MAIR_EL2, {MISCREG_HMAIR0, MISCREG_HMAIR1}},
+    {MISCREG_AMAIR_EL1, {MISCREG_AMAIR0, MISCREG_AMAIR1}},
+    {MISCREG_VBAR_EL1, {MISCREG_VBAR, 0}},
+    {MISCREG_VBAR_EL2, {MISCREG_HVBAR, 0}},
+    {MISCREG_CONTEXTIDR_EL1, {MISCREG_CONTEXTIDR, 0}},
+    {MISCREG_TPIDR_EL0, {MISCREG_TPIDRURW, 0}},
+    {MISCREG_TPIDRRO_EL0, {MISCREG_TPIDRURO, 0}},
+    {MISCREG_TPIDR_EL1, {MISCREG_TPIDRPRW, 0}},
+    {MISCREG_TPIDR_EL2, {MISCREG_HTPIDR, 0}},
+    {MISCREG_TEECR32_EL1, {MISCREG_TEECR, 0}},
+    {MISCREG_CNTFRQ_EL0, {MISCREG_CNTFRQ, 0}},
+    {MISCREG_CNTPCT_EL0, {MISCREG_CNTPCT, 0}},
+    {MISCREG_CNTVCT_EL0, {MISCREG_CNTVCT, 0}},
+    {MISCREG_CNTVOFF_EL2, {MISCREG_CNTVOFF, 0}},
+    {MISCREG_CNTKCTL_EL1, {MISCREG_CNTKCTL, 0}},
+    {MISCREG_CNTHCTL_EL2, {MISCREG_CNTHCTL, 0}},
+    {MISCREG_CNTP_TVAL_EL0, {MISCREG_CNTP_TVAL, 0}},
+    {MISCREG_CNTP_CTL_EL0, {MISCREG_CNTP_CTL, 0}},
+    {MISCREG_CNTP_CVAL_EL0, {MISCREG_CNTP_CVAL, 0}},
+    {MISCREG_CNTV_TVAL_EL0, {MISCREG_CNTV_TVAL, 0}},
+    {MISCREG_CNTV_CTL_EL0, {MISCREG_CNTV_CTL, 0}},
+    {MISCREG_CNTV_CVAL_EL0, {MISCREG_CNTV_CVAL, 0}},
+    {MISCREG_CNTHP_TVAL_EL2, {MISCREG_CNTHP_TVAL, 0}},
+    {MISCREG_CNTHP_CTL_EL2, {MISCREG_CNTHP_CTL, 0}},
+    {MISCREG_CNTHP_CVAL_EL2, {MISCREG_CNTHP_CVAL, 0}},
+    {MISCREG_DACR32_EL2, {MISCREG_DACR, 0}},
+    {MISCREG_IFSR32_EL2, {MISCREG_IFSR, 0}},
+    {MISCREG_TEEHBR32_EL1, {MISCREG_TEEHBR, 0}},
+    {MISCREG_SDER32_EL3, {MISCREG_SDER, 0}}
+};
+
+
 ISA::ISA(Params *p)
-    : SimObject(p)
+    : SimObject(p), system(NULL), lookUpMiscReg(NUM_MISCREGS, {0,0})
 {
     SCTLR sctlr;
     sctlr = 0;
     miscRegs[MISCREG_SCTLR_RST] = sctlr;
+
+    system = dynamic_cast<ArmSystem *>(p->system);
+    DPRINTFN("ISA system set to: %p %p\n", system, p->system);
+
+    // Cache system-level properties
+    if (FullSystem && system) {
+        haveSecurity = system->haveSecurity();
+        haveLPAE = system->haveLPAE();
+        haveVirtualization = system->haveVirtualization();
+        haveLargeAsid64 = system->haveLargeAsid64();
+        physAddrRange64 = system->physAddrRange64();
+    } else {
+        haveSecurity = haveLPAE = haveVirtualization = false;
+        haveLargeAsid64 = false;
+        physAddrRange64 = 32;  // dummy value
+    }
+
+    /** Fill in the miscReg translation table */
+    for (uint32_t i = 0; i < miscRegTranslateMax; i++) {
+        struct MiscRegLUTEntry new_entry;
+
+        uint32_t select = MiscRegSwitch[i].index;
+        new_entry = MiscRegSwitch[i].entry;
+
+        lookUpMiscReg[select] = new_entry;
+    }
+
+    preUnflattenMiscReg();
+
     clear();
 }
 
@@ -73,27 +172,42 @@ ISA::clear()
 
     SCTLR sctlr_rst = miscRegs[MISCREG_SCTLR_RST];
     memset(miscRegs, 0, sizeof(miscRegs));
+
+    // Initialize configurable default values
+    miscRegs[MISCREG_MIDR] = p->midr;
+    miscRegs[MISCREG_MIDR_EL1] = p->midr;
+    miscRegs[MISCREG_VPIDR] = p->midr;
+
+    if (FullSystem && system->highestELIs64()) {
+        // Initialize AArch64 state
+        clear64(p);
+        return;
+    }
+
+    // Initialize AArch32 state...
+
     CPSR cpsr = 0;
     cpsr.mode = MODE_USER;
     miscRegs[MISCREG_CPSR] = cpsr;
     updateRegMap(cpsr);
 
     SCTLR sctlr = 0;
-    sctlr.te = (bool)sctlr_rst.te;
-    sctlr.nmfi = (bool)sctlr_rst.nmfi;
-    sctlr.v = (bool)sctlr_rst.v;
-    sctlr.u    = 1;
+    sctlr.te = (bool) sctlr_rst.te;
+    sctlr.nmfi = (bool) sctlr_rst.nmfi;
+    sctlr.v = (bool) sctlr_rst.v;
+    sctlr.u = 1;
     sctlr.xp = 1;
     sctlr.rao2 = 1;
     sctlr.rao3 = 1;
-    sctlr.rao4 = 1;
-    miscRegs[MISCREG_SCTLR] = sctlr;
+    sctlr.rao4 = 0xf;  // SCTLR[6:3]
+    miscRegs[MISCREG_SCTLR_NS] = sctlr;
     miscRegs[MISCREG_SCTLR_RST] = sctlr_rst;
+    miscRegs[MISCREG_HCPTR] = 0;
 
-    /* Start with an event in the mailbox */
+    // Start with an event in the mailbox
     miscRegs[MISCREG_SEV_MAILBOX] = 1;
 
-    // Separate Instruction and Data TLBs.
+    // Separate Instruction and Data TLBs
     miscRegs[MISCREG_TLBTR] = 1;
 
     MVFR0 mvfr0 = 0;
@@ -119,7 +233,8 @@ ISA::clear()
 
     // Reset values of PRRR and NMRR are implementation dependent
 
-    miscRegs[MISCREG_PRRR] =
+    // @todo: PRRR and NMRR in secure state?
+    miscRegs[MISCREG_PRRR_NS] =
         (1 << 19) | // 19
         (0 << 18) | // 18
         (0 << 17) | // 17
@@ -132,7 +247,7 @@ ISA::clear()
         (2 << 4)  | // 5:4
         (1 << 2)  | // 3:2
         0;          // 1:0
-    miscRegs[MISCREG_NMRR] =
+    miscRegs[MISCREG_NMRR_NS] =
         (1 << 30) | // 31:30
         (0 << 26) | // 27:26
         (0 << 24) | // 25:24
@@ -151,8 +266,6 @@ ISA::clear()
 
     miscRegs[MISCREG_CPACR] = 0;
 
-    // Initialize configurable default values
-    miscRegs[MISCREG_MIDR] = p->midr;
 
     miscRegs[MISCREG_ID_PFR0] = p->id_pfr0;
     miscRegs[MISCREG_ID_PFR1] = p->id_pfr1;
@@ -169,27 +282,132 @@ ISA::clear()
     miscRegs[MISCREG_ID_ISAR4] = p->id_isar4;
     miscRegs[MISCREG_ID_ISAR5] = p->id_isar5;
 
-
     miscRegs[MISCREG_FPSID] = p->fpsid;
 
+    if (haveLPAE) {
+        TTBCR ttbcr = miscRegs[MISCREG_TTBCR_NS];
+        ttbcr.eae = 0;
+        miscRegs[MISCREG_TTBCR_NS] = ttbcr;
+        // Enforce consistency with system-level settings
+        miscRegs[MISCREG_ID_MMFR0] = (miscRegs[MISCREG_ID_MMFR0] & ~0xf) | 0x5;
+    }
+
+    if (haveSecurity) {
+        miscRegs[MISCREG_SCTLR_S] = sctlr;
+        miscRegs[MISCREG_SCR] = 0;
+        miscRegs[MISCREG_VBAR_S] = 0;
+    } else {
+        // we're always non-secure
+        miscRegs[MISCREG_SCR] = 1;
+    }
 
     //XXX We need to initialize the rest of the state.
 }
 
+void
+ISA::clear64(const ArmISAParams *p)
+{
+    CPSR cpsr = 0;
+    Addr rvbar = system->resetAddr64();
+    switch (system->highestEL()) {
+        // Set initial EL to highest implemented EL using associated stack
+        // pointer (SP_ELx); set RVBAR_ELx to implementation defined reset
+        // value
+      case EL3:
+        cpsr.mode = MODE_EL3H;
+        miscRegs[MISCREG_RVBAR_EL3] = rvbar;
+        break;
+      case EL2:
+        cpsr.mode = MODE_EL2H;
+        miscRegs[MISCREG_RVBAR_EL2] = rvbar;
+        break;
+      case EL1:
+        cpsr.mode = MODE_EL1H;
+        miscRegs[MISCREG_RVBAR_EL1] = rvbar;
+        break;
+      default:
+        panic("Invalid highest implemented exception level");
+        break;
+    }
+
+    // Initialize rest of CPSR
+    cpsr.daif = 0xf;  // Mask all interrupts
+    cpsr.ss = 0;
+    cpsr.il = 0;
+    miscRegs[MISCREG_CPSR] = cpsr;
+    updateRegMap(cpsr);
+
+    // Initialize other control registers
+    miscRegs[MISCREG_MPIDR_EL1] = 0x80000000;
+    if (haveSecurity) {
+        miscRegs[MISCREG_SCTLR_EL3] = 0x30c50870;
+        miscRegs[MISCREG_SCR_EL3]   = 0x00000030;  // RES1 fields
+    // @todo: uncomment this to enable Virtualization
+    // } else if (haveVirtualization) {
+    //     miscRegs[MISCREG_SCTLR_EL2] = 0x30c50870;
+    } else {
+        miscRegs[MISCREG_SCTLR_EL1] = 0x30c50870;
+        // Always non-secure
+        miscRegs[MISCREG_SCR_EL3] = 1;
+    }
+
+    // Initialize configurable id registers
+    miscRegs[MISCREG_ID_AA64AFR0_EL1] = p->id_aa64afr0_el1;
+    miscRegs[MISCREG_ID_AA64AFR1_EL1] = p->id_aa64afr1_el1;
+    miscRegs[MISCREG_ID_AA64DFR0_EL1] = p->id_aa64dfr0_el1;
+    miscRegs[MISCREG_ID_AA64DFR1_EL1] = p->id_aa64dfr1_el1;
+    miscRegs[MISCREG_ID_AA64ISAR0_EL1] = p->id_aa64isar0_el1;
+    miscRegs[MISCREG_ID_AA64ISAR1_EL1] = p->id_aa64isar1_el1;
+    miscRegs[MISCREG_ID_AA64MMFR0_EL1] = p->id_aa64mmfr0_el1;
+    miscRegs[MISCREG_ID_AA64MMFR1_EL1] = p->id_aa64mmfr1_el1;
+    miscRegs[MISCREG_ID_AA64PFR0_EL1] = p->id_aa64pfr0_el1;
+    miscRegs[MISCREG_ID_AA64PFR1_EL1] = p->id_aa64pfr1_el1;
+
+    // Enforce consistency with system-level settings...
+
+    // EL3
+    // (no AArch32/64 interprocessing support for now)
+    miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits(
+        miscRegs[MISCREG_ID_AA64PFR0_EL1], 15, 12,
+        haveSecurity ? 0x1 : 0x0);
+    // EL2
+    // (no AArch32/64 interprocessing support for now)
+    miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits(
+        miscRegs[MISCREG_ID_AA64PFR0_EL1], 11, 8,
+        haveVirtualization ? 0x1 : 0x0);
+    // Large ASID support
+    miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits(
+        miscRegs[MISCREG_ID_AA64MMFR0_EL1], 7, 4,
+        haveLargeAsid64 ? 0x2 : 0x0);
+    // Physical address size
+    miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits(
+        miscRegs[MISCREG_ID_AA64MMFR0_EL1], 3, 0,
+        encodePhysAddrRange64(physAddrRange64));
+}
+
 MiscReg
 ISA::readMiscRegNoEffect(int misc_reg) const
 {
     assert(misc_reg < NumMiscRegs);
 
-    int flat_idx;
-    if (misc_reg == MISCREG_SPSR)
-        flat_idx = flattenMiscIndex(misc_reg);
-    else
-        flat_idx = misc_reg;
-    MiscReg val = miscRegs[flat_idx];
+    int flat_idx = flattenMiscIndex(misc_reg);  // Note: indexes of AArch64
+                                                // registers are left unchanged
+    MiscReg val;
+
+    if (lookUpMiscReg[flat_idx].lower == 0 || flat_idx == MISCREG_SPSR
+            || flat_idx == MISCREG_SCTLR_EL1) {
+        if (flat_idx == MISCREG_SPSR)
+            flat_idx = flattenMiscIndex(MISCREG_SPSR);
+        if (flat_idx == MISCREG_SCTLR_EL1)
+            flat_idx = flattenMiscIndex(MISCREG_SCTLR);
+        val = miscRegs[flat_idx];
+    } else
+        if (lookUpMiscReg[flat_idx].upper > 0)
+            val = ((miscRegs[lookUpMiscReg[flat_idx].lower] & mask(32))
+                    | (miscRegs[lookUpMiscReg[flat_idx].upper] << 32));
+        else
+            val = miscRegs[lookUpMiscReg[flat_idx].lower];
 
-    DPRINTF(MiscRegs, "Reading From misc reg %d (%d) : %#x\n",
-            misc_reg, flat_idx, val);
     return val;
 }
 
@@ -197,33 +415,98 @@ ISA::readMiscRegNoEffect(int misc_reg) const
 MiscReg
 ISA::readMiscReg(int misc_reg, ThreadContext *tc)
 {
-    ArmSystem *arm_sys;
+    CPSR cpsr = 0;
+    PCState pc = 0;
+    SCR scr = 0;
 
     if (misc_reg == MISCREG_CPSR) {
-        CPSR cpsr = miscRegs[misc_reg];
-        PCState pc = tc->pcState();
+        cpsr = miscRegs[misc_reg];
+        pc = tc->pcState();
         cpsr.j = pc.jazelle() ? 1 : 0;
         cpsr.t = pc.thumb() ? 1 : 0;
         return cpsr;
     }
-    if (misc_reg >= MISCREG_CP15_UNIMP_START)
-        panic("Unimplemented CP15 register %s read.\n",
-              miscRegName[misc_reg]);
 
-    switch (misc_reg) {
+#ifndef NDEBUG
+    if (!miscRegInfo[misc_reg][MISCREG_IMPLEMENTED]) {
+        if (miscRegInfo[misc_reg][MISCREG_WARN_NOT_FAIL])
+            warn("Unimplemented system register %s read.\n",
+                 miscRegName[misc_reg]);
+        else
+            panic("Unimplemented system register %s read.\n",
+                  miscRegName[misc_reg]);
+    }
+#endif
+
+    switch (unflattenMiscReg(misc_reg)) {
+      case MISCREG_HCR:
+        {
+            if (!haveVirtualization)
+                return 0;
+            else
+                return readMiscRegNoEffect(MISCREG_HCR);
+        }
+      case MISCREG_CPACR:
+        {
+            const uint32_t ones = (uint32_t)(-1);
+            CPACR cpacrMask = 0;
+            // Only cp10, cp11, and ase are implemented, nothing else should
+            // be readable? (straight copy from the write code)
+            cpacrMask.cp10 = ones;
+            cpacrMask.cp11 = ones;
+            cpacrMask.asedis = ones;
+
+            // Security Extensions may limit the readability of CPACR
+            if (haveSecurity) {
+                scr = readMiscRegNoEffect(MISCREG_SCR);
+                cpsr = readMiscRegNoEffect(MISCREG_CPSR);
+                if (scr.ns && (cpsr.mode != MODE_MON)) {
+                    NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR);
+                    // NB: Skipping the full loop, here
+                    if (!nsacr.cp10) cpacrMask.cp10 = 0;
+                    if (!nsacr.cp11) cpacrMask.cp11 = 0;
+                }
+            }
+            MiscReg val = readMiscRegNoEffect(MISCREG_CPACR);
+            val &= cpacrMask;
+            DPRINTF(MiscRegs, "Reading misc reg %s: %#x\n",
+                    miscRegName[misc_reg], val);
+            return val;
+        }
       case MISCREG_MPIDR:
-        arm_sys = dynamic_cast<ArmSystem*>(tc->getSystemPtr());
-        assert(arm_sys);
-
-        if (arm_sys->multiProc) {
-            return 0x80000000 | // multiprocessor extensions available
-                   tc->cpuId();
+        cpsr = readMiscRegNoEffect(MISCREG_CPSR);
+        scr  = readMiscRegNoEffect(MISCREG_SCR);
+        if ((cpsr.mode == MODE_HYP) || inSecureState(scr, cpsr)) {
+            return getMPIDR(system, tc);
         } else {
-            return 0x80000000 |  // multiprocessor extensions available
-                   0x40000000 |  // in up system
-                   tc->cpuId();
+            return readMiscReg(MISCREG_VMPIDR, tc);
+        }
+            break;
+      case MISCREG_MPIDR_EL1:
+        // @todo in the absence of v8 virtualization support just return MPIDR_EL1
+        return getMPIDR(system, tc) & 0xffffffff;
+      case MISCREG_VMPIDR:
+        // top bit defined as RES1
+        return readMiscRegNoEffect(misc_reg) | 0x80000000;
+      case MISCREG_ID_AFR0: // not implemented, so alias MIDR
+      case MISCREG_ID_DFR0: // not implemented, so alias MIDR
+      case MISCREG_REVIDR:  // not implemented, so alias MIDR
+      case MISCREG_MIDR:
+        cpsr = readMiscRegNoEffect(MISCREG_CPSR);
+        scr  = readMiscRegNoEffect(MISCREG_SCR);
+        if ((cpsr.mode == MODE_HYP) || inSecureState(scr, cpsr)) {
+            return readMiscRegNoEffect(misc_reg);
+        } else {
+            return readMiscRegNoEffect(MISCREG_VPIDR);
         }
         break;
+      case MISCREG_JOSCR: // Jazelle trivial implementation, RAZ/WI
+      case MISCREG_JMCR:  // Jazelle trivial implementation, RAZ/WI
+      case MISCREG_JIDR:  // Jazelle trivial implementation, RAZ/WI
+      case MISCREG_AIDR:  // AUX ID set to 0
+      case MISCREG_TCMTR: // No TCM's
+        return 0;
+
       case MISCREG_CLIDR:
         warn_once("The clidr register always reports 0 caches.\n");
         warn_once("clidr LoUIS field of 0b001 to match current "
@@ -276,6 +559,75 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc)
         return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrQcMask;
       case MISCREG_FPSCR_EXC:
         return readMiscRegNoEffect(MISCREG_FPSCR) & ~FpscrExcMask;
+      case MISCREG_FPSR:
+        {
+            const uint32_t ones = (uint32_t)(-1);
+            FPSCR fpscrMask = 0;
+            fpscrMask.ioc = ones;
+            fpscrMask.dzc = ones;
+            fpscrMask.ofc = ones;
+            fpscrMask.ufc = ones;
+            fpscrMask.ixc = ones;
+            fpscrMask.idc = ones;
+            fpscrMask.qc = ones;
+            fpscrMask.v = ones;
+            fpscrMask.c = ones;
+            fpscrMask.z = ones;
+            fpscrMask.n = ones;
+            return readMiscRegNoEffect(MISCREG_FPSCR) & (uint32_t)fpscrMask;
+        }
+      case MISCREG_FPCR:
+        {
+            const uint32_t ones = (uint32_t)(-1);
+            FPSCR fpscrMask  = 0;
+            fpscrMask.ioe = ones;
+            fpscrMask.dze = ones;
+            fpscrMask.ofe = ones;
+            fpscrMask.ufe = ones;
+            fpscrMask.ixe = ones;
+            fpscrMask.ide = ones;
+            fpscrMask.len    = ones;
+            fpscrMask.stride = ones;
+            fpscrMask.rMode  = ones;
+            fpscrMask.fz     = ones;
+            fpscrMask.dn     = ones;
+            fpscrMask.ahp    = ones;
+            return readMiscRegNoEffect(MISCREG_FPSCR) & (uint32_t)fpscrMask;
+        }
+      case MISCREG_NZCV:
+        {
+            CPSR cpsr = 0;
+            cpsr.nz   = tc->readIntReg(INTREG_CONDCODES_NZ);
+            cpsr.c    = tc->readIntReg(INTREG_CONDCODES_C);
+            cpsr.v    = tc->readIntReg(INTREG_CONDCODES_V);
+            return cpsr;
+        }
+      case MISCREG_DAIF:
+        {
+            CPSR cpsr = 0;
+            cpsr.daif = (uint8_t) ((CPSR) miscRegs[MISCREG_CPSR]).daif;
+            return cpsr;
+        }
+      case MISCREG_SP_EL0:
+        {
+            return tc->readIntReg(INTREG_SP0);
+        }
+      case MISCREG_SP_EL1:
+        {
+            return tc->readIntReg(INTREG_SP1);
+        }
+      case MISCREG_SP_EL2:
+        {
+            return tc->readIntReg(INTREG_SP2);
+        }
+      case MISCREG_SPSEL:
+        {
+            return miscRegs[MISCREG_CPSR] & 0x1;
+        }
+      case MISCREG_CURRENTEL:
+        {
+            return miscRegs[MISCREG_CPSR] & 0xc;
+        }
       case MISCREG_L2CTLR:
         {
             // mostly unimplemented, just set NumCPUs field from sim and return
@@ -289,8 +641,120 @@ ISA::readMiscReg(int misc_reg, ThreadContext *tc)
          * Return 0 as we don't support debug architecture yet.
          */
         return 0;
-      case MISCREG_DBGDSCR_INT:
+      case MISCREG_DBGDSCRint:
         return 0;
+      case MISCREG_ISR:
+        return tc->getCpuPtr()->getInterruptController()->getISR(
+            readMiscRegNoEffect(MISCREG_HCR),
+            readMiscRegNoEffect(MISCREG_CPSR),
+            readMiscRegNoEffect(MISCREG_SCR));
+      case MISCREG_ISR_EL1:
+        return tc->getCpuPtr()->getInterruptController()->getISR(
+            readMiscRegNoEffect(MISCREG_HCR_EL2),
+            readMiscRegNoEffect(MISCREG_CPSR),
+            readMiscRegNoEffect(MISCREG_SCR_EL3));
+      case MISCREG_DCZID_EL0:
+        return 0x04;  // DC ZVA clear 64-byte chunks
+      case MISCREG_HCPTR:
+        {
+            MiscReg val = readMiscRegNoEffect(misc_reg);
+            // The trap bit associated with CP14 is defined as RAZ
+            val &= ~(1 << 14);
+            // If a CP bit in NSACR is 0 then the corresponding bit in
+            // HCPTR is RAO/WI
+            bool secure_lookup = haveSecurity &&
+                inSecureState(readMiscRegNoEffect(MISCREG_SCR),
+                              readMiscRegNoEffect(MISCREG_CPSR));
+            if (!secure_lookup) {
+                MiscReg mask = readMiscRegNoEffect(MISCREG_NSACR);
+                val |= (mask ^ 0x7FFF) & 0xBFFF;
+            }
+            // Set the bits for unimplemented coprocessors to RAO/WI
+            val |= 0x33FF;
+            return (val);
+        }
+      case MISCREG_HDFAR: // alias for secure DFAR
+        return readMiscRegNoEffect(MISCREG_DFAR_S);
+      case MISCREG_HIFAR: // alias for secure IFAR
+        return readMiscRegNoEffect(MISCREG_IFAR_S);
+      case MISCREG_HVBAR: // bottom bits reserved
+        return readMiscRegNoEffect(MISCREG_HVBAR) & 0xFFFFFFE0;
+      case MISCREG_SCTLR: // Some bits hardwired
+        // The FI field (bit 21) is common between S/NS versions of the register
+        return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21))  |
+               (readMiscRegNoEffect(misc_reg)        & 0x72DD39FF) | 0x00C00818; // V8 SCTLR
+      case MISCREG_SCTLR_EL1:
+        // The FI field (bit 21) is common between S/NS versions of the register
+        return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21))  |
+               (readMiscRegNoEffect(misc_reg)        & 0x37DDDBFF) | 0x30D00800; // V8 SCTLR_EL1
+      case MISCREG_SCTLR_EL3:
+        // The FI field (bit 21) is common between S/NS versions of the register
+        return (readMiscRegNoEffect(MISCREG_SCTLR_S) & (1 << 21))  |
+               (readMiscRegNoEffect(misc_reg)        & 0x32CD183F) | 0x30C50830; // V8 SCTLR_EL3
+      case MISCREG_HSCTLR: // FI comes from SCTLR
+        {
+            uint32_t mask = 1 << 27;
+            return (readMiscRegNoEffect(MISCREG_HSCTLR) & ~mask) |
+                (readMiscRegNoEffect(MISCREG_SCTLR)  &  mask);
+        }
+      case MISCREG_SCR:
+        {
+            CPSR cpsr = readMiscRegNoEffect(MISCREG_CPSR);
+            if (cpsr.width) {
+                return readMiscRegNoEffect(MISCREG_SCR);
+            } else {
+                return readMiscRegNoEffect(MISCREG_SCR_EL3);
+            }
+        }
+      // Generic Timer registers
+      case MISCREG_CNTFRQ:
+      case MISCREG_CNTFRQ_EL0:
+        inform_once("Read CNTFREQ_EL0 frequency\n");
+        return getSystemCounter(tc)->freq();
+      case MISCREG_CNTPCT:
+      case MISCREG_CNTPCT_EL0:
+        return getSystemCounter(tc)->value();
+      case MISCREG_CNTVCT:
+        return getSystemCounter(tc)->value();
+      case MISCREG_CNTVCT_EL0:
+        return getSystemCounter(tc)->value();
+      case MISCREG_CNTP_CVAL:
+      case MISCREG_CNTP_CVAL_EL0:
+        return getArchTimer(tc, tc->cpuId())->compareValue();
+      case MISCREG_CNTP_TVAL:
+      case MISCREG_CNTP_TVAL_EL0:
+        return getArchTimer(tc, tc->cpuId())->timerValue();
+      case MISCREG_CNTP_CTL:
+      case MISCREG_CNTP_CTL_EL0:
+        return getArchTimer(tc, tc->cpuId())->control();
+      // PL1 phys. timer, secure
+      //   AArch64
+      case MISCREG_CNTPS_CVAL_EL1:
+      case MISCREG_CNTPS_TVAL_EL1:
+      case MISCREG_CNTPS_CTL_EL1:
+      // PL2 phys. timer, non-secure
+      //   AArch32
+      case MISCREG_CNTHCTL:
+      case MISCREG_CNTHP_CVAL:
+      case MISCREG_CNTHP_TVAL:
+      case MISCREG_CNTHP_CTL:
+      //   AArch64
+      case MISCREG_CNTHCTL_EL2:
+      case MISCREG_CNTHP_CVAL_EL2:
+      case MISCREG_CNTHP_TVAL_EL2:
+      case MISCREG_CNTHP_CTL_EL2:
+      // Virtual timer
+      //   AArch32
+      case MISCREG_CNTV_CVAL:
+      case MISCREG_CNTV_TVAL:
+      case MISCREG_CNTV_CTL:
+      //   AArch64
+      // case MISCREG_CNTV_CVAL_EL2:
+      // case MISCREG_CNTV_TVAL_EL2:
+      // case MISCREG_CNTV_CTL_EL2:
+        panic("Generic Timer register not implemented\n");
+        break;
+
     }
     return readMiscRegNoEffect(misc_reg);
 }
@@ -300,15 +764,28 @@ ISA::setMiscRegNoEffect(int misc_reg, const MiscReg &val)
 {
     assert(misc_reg < NumMiscRegs);
 
-    int flat_idx;
-    if (misc_reg == MISCREG_SPSR)
-        flat_idx = flattenMiscIndex(misc_reg);
-    else
-        flat_idx = misc_reg;
-    miscRegs[flat_idx] = val;
+    int flat_idx = flattenMiscIndex(misc_reg);  // Note: indexes of AArch64
+                                                // registers are left unchanged
 
-    DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n", misc_reg,
-            flat_idx, val);
+    int flat_idx2 = lookUpMiscReg[flat_idx].upper;
+
+    if (flat_idx2 > 0) {
+        miscRegs[lookUpMiscReg[flat_idx].lower] = bits(val, 31, 0);
+        miscRegs[flat_idx2] = bits(val, 63, 32);
+        DPRINTF(MiscRegs, "Writing to misc reg %d (%d:%d) : %#x\n",
+                misc_reg, flat_idx, flat_idx2, val);
+    } else {
+        if (flat_idx == MISCREG_SPSR)
+            flat_idx = flattenMiscIndex(MISCREG_SPSR);
+        else if (flat_idx == MISCREG_SCTLR_EL1)
+            flat_idx = flattenMiscIndex(MISCREG_SCTLR);
+        else
+            flat_idx = (lookUpMiscReg[flat_idx].lower > 0) ?
+                       lookUpMiscReg[flat_idx].lower : flat_idx;
+        miscRegs[flat_idx] = val;
+        DPRINTF(MiscRegs, "Writing to misc reg %d (%d) : %#x\n",
+                misc_reg, flat_idx, val);
+    }
 }
 
 void
@@ -317,8 +794,13 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
 
     MiscReg newVal = val;
     int x;
+    bool secure_lookup;
+    bool hyp;
     System *sys;
     ThreadContext *oc;
+    uint8_t target_el;
+    uint16_t asid;
+    SCR scr;
 
     if (misc_reg == MISCREG_CPSR) {
         updateRegMap(val);
@@ -346,12 +828,18 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
         } else {
             tc->pcState(pc);
         }
-    } else if (misc_reg >= MISCREG_CP15_UNIMP_START &&
-        misc_reg < MISCREG_CP15_END) {
-        panic("Unimplemented CP15 register %s wrote with %#x.\n",
-              miscRegName[misc_reg], val);
     } else {
-        switch (misc_reg) {
+#ifndef NDEBUG
+        if (!miscRegInfo[misc_reg][MISCREG_IMPLEMENTED]) {
+            if (miscRegInfo[misc_reg][MISCREG_WARN_NOT_FAIL])
+                warn("Unimplemented system register %s write with %#x.\n",
+                    miscRegName[misc_reg], val);
+            else
+                panic("Unimplemented system register %s write with %#x.\n",
+                    miscRegName[misc_reg], val);
+        }
+#endif
+        switch (unflattenMiscReg(misc_reg)) {
           case MISCREG_CPACR:
             {
 
@@ -362,7 +850,61 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
                 cpacrMask.cp10 = ones;
                 cpacrMask.cp11 = ones;
                 cpacrMask.asedis = ones;
+
+                // Security Extensions may limit the writability of CPACR
+                if (haveSecurity) {
+                    scr = readMiscRegNoEffect(MISCREG_SCR);
+                    CPSR cpsr = readMiscRegNoEffect(MISCREG_CPSR);
+                    if (scr.ns && (cpsr.mode != MODE_MON)) {
+                        NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR);
+                        // NB: Skipping the full loop, here
+                        if (!nsacr.cp10) cpacrMask.cp10 = 0;
+                        if (!nsacr.cp11) cpacrMask.cp11 = 0;
+                    }
+                }
+
+                MiscReg old_val = readMiscRegNoEffect(MISCREG_CPACR);
                 newVal &= cpacrMask;
+                newVal |= old_val & ~cpacrMask;
+                DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n",
+                        miscRegName[misc_reg], newVal);
+            }
+            break;
+          case MISCREG_CPACR_EL1:
+            {
+                const uint32_t ones = (uint32_t)(-1);
+                CPACR cpacrMask = 0;
+                cpacrMask.tta = ones;
+                cpacrMask.fpen = ones;
+                newVal &= cpacrMask;
+                DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n",
+                        miscRegName[misc_reg], newVal);
+            }
+            break;
+          case MISCREG_CPTR_EL2:
+            {
+                const uint32_t ones = (uint32_t)(-1);
+                CPTR cptrMask = 0;
+                cptrMask.tcpac = ones;
+                cptrMask.tta = ones;
+                cptrMask.tfp = ones;
+                newVal &= cptrMask;
+                cptrMask = 0;
+                cptrMask.res1_13_12_el2 = ones;
+                cptrMask.res1_9_0_el2 = ones;
+                newVal |= cptrMask;
+                DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n",
+                        miscRegName[misc_reg], newVal);
+            }
+            break;
+          case MISCREG_CPTR_EL3:
+            {
+                const uint32_t ones = (uint32_t)(-1);
+                CPTR cptrMask = 0;
+                cptrMask.tcpac = ones;
+                cptrMask.tta = ones;
+                cptrMask.tfp = ones;
+                newVal &= cptrMask;
                 DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n",
                         miscRegName[misc_reg], newVal);
             }
@@ -370,6 +912,11 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
           case MISCREG_CSSELR:
             warn_once("The csselr register isn't implemented.\n");
             return;
+
+          case MISCREG_DC_ZVA_Xt:
+            warn("Calling DC ZVA! Not Implemeted! Expect WEIRD results\n");
+            return;
+
           case MISCREG_FPSCR:
             {
                 const uint32_t ones = (uint32_t)(-1);
@@ -380,6 +927,12 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
                 fpscrMask.ufc = ones;
                 fpscrMask.ixc = ones;
                 fpscrMask.idc = ones;
+                fpscrMask.ioe = ones;
+                fpscrMask.dze = ones;
+                fpscrMask.ofe = ones;
+                fpscrMask.ufe = ones;
+                fpscrMask.ixe = ones;
+                fpscrMask.ide = ones;
                 fpscrMask.len = ones;
                 fpscrMask.stride = ones;
                 fpscrMask.rMode = ones;
@@ -392,26 +945,72 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
                 fpscrMask.z = ones;
                 fpscrMask.n = ones;
                 newVal = (newVal & (uint32_t)fpscrMask) |
-                         (miscRegs[MISCREG_FPSCR] & ~(uint32_t)fpscrMask);
+                         (readMiscRegNoEffect(MISCREG_FPSCR) &
+                          ~(uint32_t)fpscrMask);
                 tc->getDecoderPtr()->setContext(newVal);
             }
             break;
+          case MISCREG_FPSR:
+            {
+                const uint32_t ones = (uint32_t)(-1);
+                FPSCR fpscrMask = 0;
+                fpscrMask.ioc = ones;
+                fpscrMask.dzc = ones;
+                fpscrMask.ofc = ones;
+                fpscrMask.ufc = ones;
+                fpscrMask.ixc = ones;
+                fpscrMask.idc = ones;
+                fpscrMask.qc = ones;
+                fpscrMask.v = ones;
+                fpscrMask.c = ones;
+                fpscrMask.z = ones;
+                fpscrMask.n = ones;
+                newVal = (newVal & (uint32_t)fpscrMask) |
+                         (readMiscRegNoEffect(MISCREG_FPSCR) &
+                          ~(uint32_t)fpscrMask);
+                misc_reg = MISCREG_FPSCR;
+            }
+            break;
+          case MISCREG_FPCR:
+            {
+                const uint32_t ones = (uint32_t)(-1);
+                FPSCR fpscrMask  = 0;
+                fpscrMask.ioe = ones;
+                fpscrMask.dze = ones;
+                fpscrMask.ofe = ones;
+                fpscrMask.ufe = ones;
+                fpscrMask.ixe = ones;
+                fpscrMask.ide = ones;
+                fpscrMask.len    = ones;
+                fpscrMask.stride = ones;
+                fpscrMask.rMode  = ones;
+                fpscrMask.fz     = ones;
+                fpscrMask.dn     = ones;
+                fpscrMask.ahp    = ones;
+                newVal = (newVal & (uint32_t)fpscrMask) |
+                         (readMiscRegNoEffect(MISCREG_FPSCR) &
+                          ~(uint32_t)fpscrMask);
+                misc_reg = MISCREG_FPSCR;
+            }
+            break;
           case MISCREG_CPSR_Q:
             {
                 assert(!(newVal & ~CpsrMaskQ));
-                newVal = miscRegs[MISCREG_CPSR] | newVal;
+                newVal = readMiscRegNoEffect(MISCREG_CPSR) | newVal;
                 misc_reg = MISCREG_CPSR;
             }
             break;
           case MISCREG_FPSCR_QC:
             {
-                newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrQcMask);
+                newVal = readMiscRegNoEffect(MISCREG_FPSCR) |
+                         (newVal & FpscrQcMask);
                 misc_reg = MISCREG_FPSCR;
             }
             break;
           case MISCREG_FPSCR_EXC:
             {
-                newVal = miscRegs[MISCREG_FPSCR] | (newVal & FpscrExcMask);
+                newVal = readMiscRegNoEffect(MISCREG_FPSCR) |
+                         (newVal & FpscrExcMask);
                 misc_reg = MISCREG_FPSCR;
             }
             break;
@@ -421,16 +1020,63 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
                 // bit 29 - valid only if fpexc[31] is 0
                 const uint32_t fpexcMask = 0x60000000;
                 newVal = (newVal & fpexcMask) |
-                         (miscRegs[MISCREG_FPEXC] & ~fpexcMask);
+                         (readMiscRegNoEffect(MISCREG_FPEXC) & ~fpexcMask);
             }
             break;
+          case MISCREG_HCR:
+            {
+                if (!haveVirtualization)
+                    return;
+            }
+            break;
+          case MISCREG_IFSR:
+            {
+                // ARM ARM (ARM DDI 0406C.b) B4.1.96
+                const uint32_t ifsrMask =
+                    mask(31, 13) | mask(11, 11) | mask(8, 6);
+                newVal = newVal & ~ifsrMask;
+            }
+            break;
+          case MISCREG_DFSR:
+            {
+                // ARM ARM (ARM DDI 0406C.b) B4.1.52
+                const uint32_t dfsrMask = mask(31, 14) | mask(8, 8);
+                newVal = newVal & ~dfsrMask;
+            }
+            break;
+          case MISCREG_AMAIR0:
+          case MISCREG_AMAIR1:
+            {
+                // ARM ARM (ARM DDI 0406C.b) B4.1.5
+                // Valid only with LPAE
+                if (!haveLPAE)
+                    return;
+                DPRINTF(MiscRegs, "Writing AMAIR: %#x\n", newVal);
+            }
+            break;
+          case MISCREG_SCR:
+            tc->getITBPtr()->invalidateMiscReg();
+            tc->getDTBPtr()->invalidateMiscReg();
+            break;
           case MISCREG_SCTLR:
             {
                 DPRINTF(MiscRegs, "Writing SCTLR: %#x\n", newVal);
-                SCTLR sctlr = miscRegs[MISCREG_SCTLR];
+                MiscRegIndex sctlr_idx;
+                scr = readMiscRegNoEffect(MISCREG_SCR);
+                if (haveSecurity && !scr.ns) {
+                    sctlr_idx = MISCREG_SCTLR_S;
+                } else {
+                    sctlr_idx = MISCREG_SCTLR_NS;
+                    // The FI field (bit 21) is common between S/NS versions
+                    // of the register, we store this in the secure copy of
+                    // the reg
+                    miscRegs[MISCREG_SCTLR_S] &=         ~(1 << 21);
+                    miscRegs[MISCREG_SCTLR_S] |= newVal & (1 << 21);
+                }
+                SCTLR sctlr = miscRegs[sctlr_idx];
                 SCTLR new_sctlr = newVal;
-                new_sctlr.nmfi =  (bool)sctlr.nmfi;
-                miscRegs[MISCREG_SCTLR] = (MiscReg)new_sctlr;
+                new_sctlr.nmfi =  ((bool)sctlr.nmfi) && !haveVirtualization;
+                miscRegs[sctlr_idx] = (MiscReg)new_sctlr;
                 tc->getITBPtr()->invalidateMiscReg();
                 tc->getDTBPtr()->invalidateMiscReg();
 
@@ -440,6 +1086,7 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
                 sys = tc->getSystemPtr();
                 for (x = 0; x < sys->numContexts(); x++) {
                     oc = sys->getThreadContext(x);
+                    // @todo: double check this for security
                     SCTLR other_sctlr = oc->readMiscRegNoEffect(MISCREG_SCTLR);
                     if (!other_sctlr.c && oc->status() != ThreadContext::Halted)
                         return;
@@ -479,96 +1126,317 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
           case MISCREG_TLBTR:
           case MISCREG_MVFR0:
           case MISCREG_MVFR1:
+
+          case MISCREG_ID_AA64AFR0_EL1:
+          case MISCREG_ID_AA64AFR1_EL1:
+          case MISCREG_ID_AA64DFR0_EL1:
+          case MISCREG_ID_AA64DFR1_EL1:
+          case MISCREG_ID_AA64ISAR0_EL1:
+          case MISCREG_ID_AA64ISAR1_EL1:
+          case MISCREG_ID_AA64MMFR0_EL1:
+          case MISCREG_ID_AA64MMFR1_EL1:
+          case MISCREG_ID_AA64PFR0_EL1:
+          case MISCREG_ID_AA64PFR1_EL1:
             // ID registers are constants.
             return;
 
+          // TLBI all entries, EL0&1 inner sharable (ignored)
           case MISCREG_TLBIALLIS:
-          case MISCREG_TLBIALL:
+          case MISCREG_TLBIALL: // TLBI all entries, EL0&1,
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
             sys = tc->getSystemPtr();
             for (x = 0; x < sys->numContexts(); x++) {
                 oc = sys->getThreadContext(x);
                 assert(oc->getITBPtr() && oc->getDTBPtr());
-                oc->getITBPtr()->flushAll();
-                oc->getDTBPtr()->flushAll();
+                oc->getITBPtr()->flushAllSecurity(secure_lookup, target_el);
+                oc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el);
 
                 // If CheckerCPU is connected, need to notify it of a flush
                 CheckerCPU *checker = oc->getCheckerCpuPtr();
                 if (checker) {
-                    checker->getITBPtr()->flushAll();
-                    checker->getDTBPtr()->flushAll();
+                    checker->getITBPtr()->flushAllSecurity(secure_lookup,
+                                                           target_el);
+                    checker->getDTBPtr()->flushAllSecurity(secure_lookup,
+                                                           target_el);
                 }
             }
             return;
+          // TLBI all entries, EL0&1, instruction side
           case MISCREG_ITLBIALL:
-            tc->getITBPtr()->flushAll();
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tc->getITBPtr()->flushAllSecurity(secure_lookup, target_el);
             return;
+          // TLBI all entries, EL0&1, data side
           case MISCREG_DTLBIALL:
-            tc->getDTBPtr()->flushAll();
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el);
             return;
+          // TLBI based on VA, EL0&1 inner sharable (ignored)
           case MISCREG_TLBIMVAIS:
           case MISCREG_TLBIMVA:
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
             sys = tc->getSystemPtr();
             for (x = 0; x < sys->numContexts(); x++) {
                 oc = sys->getThreadContext(x);
                 assert(oc->getITBPtr() && oc->getDTBPtr());
                 oc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12),
-                        bits(newVal, 7,0));
+                                              bits(newVal, 7,0),
+                                              secure_lookup, target_el);
                 oc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12),
-                        bits(newVal, 7,0));
+                                              bits(newVal, 7,0),
+                                              secure_lookup, target_el);
 
                 CheckerCPU *checker = oc->getCheckerCpuPtr();
                 if (checker) {
                     checker->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12),
-                            bits(newVal, 7,0));
+                        bits(newVal, 7,0), secure_lookup, target_el);
                     checker->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12),
-                            bits(newVal, 7,0));
+                        bits(newVal, 7,0), secure_lookup, target_el);
                 }
             }
             return;
+          // TLBI by ASID, EL0&1, inner sharable
           case MISCREG_TLBIASIDIS:
           case MISCREG_TLBIASID:
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
             sys = tc->getSystemPtr();
             for (x = 0; x < sys->numContexts(); x++) {
                 oc = sys->getThreadContext(x);
                 assert(oc->getITBPtr() && oc->getDTBPtr());
-                oc->getITBPtr()->flushAsid(bits(newVal, 7,0));
-                oc->getDTBPtr()->flushAsid(bits(newVal, 7,0));
+                oc->getITBPtr()->flushAsid(bits(newVal, 7,0),
+                    secure_lookup, target_el);
+                oc->getDTBPtr()->flushAsid(bits(newVal, 7,0),
+                    secure_lookup, target_el);
                 CheckerCPU *checker = oc->getCheckerCpuPtr();
                 if (checker) {
-                    checker->getITBPtr()->flushAsid(bits(newVal, 7,0));
-                    checker->getDTBPtr()->flushAsid(bits(newVal, 7,0));
+                    checker->getITBPtr()->flushAsid(bits(newVal, 7,0),
+                        secure_lookup, target_el);
+                    checker->getDTBPtr()->flushAsid(bits(newVal, 7,0),
+                        secure_lookup, target_el);
                 }
             }
             return;
+          // TLBI by address, EL0&1, inner sharable (ignored)
           case MISCREG_TLBIMVAAIS:
           case MISCREG_TLBIMVAA:
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            hyp = 0;
+            tlbiMVA(tc, newVal, secure_lookup, hyp, target_el);
+            return;
+          // TLBI by address, EL2, hypervisor mode
+          case MISCREG_TLBIMVAH:
+          case MISCREG_TLBIMVAHIS:
+            assert32(tc);
+            target_el = 1; // aarch32, use hyp bit
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            hyp = 1;
+            tlbiMVA(tc, newVal, secure_lookup, hyp, target_el);
+            return;
+          // TLBI by address and asid, EL0&1, instruction side only
+          case MISCREG_ITLBIMVA:
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12),
+                bits(newVal, 7,0), secure_lookup, target_el);
+            return;
+          // TLBI by address and asid, EL0&1, data side only
+          case MISCREG_DTLBIMVA:
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12),
+                bits(newVal, 7,0), secure_lookup, target_el);
+            return;
+          // TLBI by ASID, EL0&1, instrution side only
+          case MISCREG_ITLBIASID:
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tc->getITBPtr()->flushAsid(bits(newVal, 7,0), secure_lookup,
+                                       target_el);
+            return;
+          // TLBI by ASID EL0&1 data size only
+          case MISCREG_DTLBIASID:
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tc->getDTBPtr()->flushAsid(bits(newVal, 7,0), secure_lookup,
+                                       target_el);
+            return;
+          // Invalidate entire Non-secure Hyp/Non-Hyp Unified TLB
+          case MISCREG_TLBIALLNSNH:
+          case MISCREG_TLBIALLNSNHIS:
+            assert32(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            hyp = 0;
+            tlbiALLN(tc, hyp, target_el);
+            return;
+          // TLBI all entries, EL2, hyp,
+          case MISCREG_TLBIALLH:
+          case MISCREG_TLBIALLHIS:
+            assert32(tc);
+            target_el = 1; // aarch32, use hyp bit
+            hyp = 1;
+            tlbiALLN(tc, hyp, target_el);
+            return;
+          // AArch64 TLBI: invalidate all entries EL3
+          case MISCREG_TLBI_ALLE3IS:
+          case MISCREG_TLBI_ALLE3:
+            assert64(tc);
+            target_el = 3;
+            secure_lookup = true;
+            tlbiALL(tc, secure_lookup, target_el);
+            return;
+          // @todo: uncomment this to enable Virtualization
+          // case MISCREG_TLBI_ALLE2IS:
+          // case MISCREG_TLBI_ALLE2:
+          // TLBI all entries, EL0&1
+          case MISCREG_TLBI_ALLE1IS:
+          case MISCREG_TLBI_ALLE1:
+          // AArch64 TLBI: invalidate all entries, stage 1, current VMID
+          case MISCREG_TLBI_VMALLE1IS:
+          case MISCREG_TLBI_VMALLE1:
+          // AArch64 TLBI: invalidate all entries, stages 1 & 2, current VMID
+          case MISCREG_TLBI_VMALLS12E1IS:
+          case MISCREG_TLBI_VMALLS12E1:
+            // @todo: handle VMID and stage 2 to enable Virtualization
+            assert64(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tlbiALL(tc, secure_lookup, target_el);
+            return;
+          // AArch64 TLBI: invalidate by VA and ASID, stage 1, current VMID
+          // VAEx(IS) and VALEx(IS) are the same because TLBs only store entries
+          // from the last level of translation table walks
+          // @todo: handle VMID to enable Virtualization
+          // TLBI all entries, EL0&1
+          case MISCREG_TLBI_VAE3IS_Xt:
+          case MISCREG_TLBI_VAE3_Xt:
+          // TLBI by VA, EL3  regime stage 1, last level walk
+          case MISCREG_TLBI_VALE3IS_Xt:
+          case MISCREG_TLBI_VALE3_Xt:
+            assert64(tc);
+            target_el = 3;
+            asid = 0xbeef; // does not matter, tlbi is global
+            secure_lookup = true;
+            tlbiVA(tc, newVal, asid, secure_lookup, target_el);
+            return;
+          // TLBI by VA, EL2
+          case MISCREG_TLBI_VAE2IS_Xt:
+          case MISCREG_TLBI_VAE2_Xt:
+          // TLBI by VA, EL2, stage1 last level walk
+          case MISCREG_TLBI_VALE2IS_Xt:
+          case MISCREG_TLBI_VALE2_Xt:
+            assert64(tc);
+            target_el = 2;
+            asid = 0xbeef; // does not matter, tlbi is global
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tlbiVA(tc, newVal, asid, secure_lookup, target_el);
+            return;
+          // TLBI by VA EL1 & 0, stage1, ASID, current VMID
+          case MISCREG_TLBI_VAE1IS_Xt:
+          case MISCREG_TLBI_VAE1_Xt:
+          case MISCREG_TLBI_VALE1IS_Xt:
+          case MISCREG_TLBI_VALE1_Xt:
+            assert64(tc);
+            asid = bits(newVal, 63, 48);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            tlbiVA(tc, newVal, asid, secure_lookup, target_el);
+            return;
+          // AArch64 TLBI: invalidate by ASID, stage 1, current VMID
+          // @todo: handle VMID to enable Virtualization
+          case MISCREG_TLBI_ASIDE1IS_Xt:
+          case MISCREG_TLBI_ASIDE1_Xt:
+            assert64(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
             sys = tc->getSystemPtr();
             for (x = 0; x < sys->numContexts(); x++) {
                 oc = sys->getThreadContext(x);
                 assert(oc->getITBPtr() && oc->getDTBPtr());
-                oc->getITBPtr()->flushMva(mbits(newVal, 31,12));
-                oc->getDTBPtr()->flushMva(mbits(newVal, 31,12));
+                asid = bits(newVal, 63, 48);
+                if (haveLargeAsid64)
+                    asid &= mask(8);
+                oc->getITBPtr()->flushAsid(asid, secure_lookup, target_el);
+                oc->getDTBPtr()->flushAsid(asid, secure_lookup, target_el);
+                CheckerCPU *checker = oc->getCheckerCpuPtr();
+                if (checker) {
+                    checker->getITBPtr()->flushAsid(asid,
+                        secure_lookup, target_el);
+                    checker->getDTBPtr()->flushAsid(asid,
+                        secure_lookup, target_el);
+                }
+            }
+            return;
+          // AArch64 TLBI: invalidate by VA, ASID, stage 1, current VMID
+          // VAAE1(IS) and VAALE1(IS) are the same because TLBs only store
+          // entries from the last level of translation table walks
+          // @todo: handle VMID to enable Virtualization
+          case MISCREG_TLBI_VAAE1IS_Xt:
+          case MISCREG_TLBI_VAAE1_Xt:
+          case MISCREG_TLBI_VAALE1IS_Xt:
+          case MISCREG_TLBI_VAALE1_Xt:
+            assert64(tc);
+            target_el = 1; // el 0 and 1 are handled together
+            scr = readMiscReg(MISCREG_SCR, tc);
+            secure_lookup = haveSecurity && !scr.ns;
+            sys = tc->getSystemPtr();
+            for (x = 0; x < sys->numContexts(); x++) {
+                // @todo: extra controls on TLBI broadcast?
+                oc = sys->getThreadContext(x);
+                assert(oc->getITBPtr() && oc->getDTBPtr());
+                Addr va = ((Addr) bits(newVal, 43, 0)) << 12;
+                oc->getITBPtr()->flushMva(va,
+                    secure_lookup, false, target_el);
+                oc->getDTBPtr()->flushMva(va,
+                    secure_lookup, false, target_el);
 
                 CheckerCPU *checker = oc->getCheckerCpuPtr();
                 if (checker) {
-                    checker->getITBPtr()->flushMva(mbits(newVal, 31,12));
-                    checker->getDTBPtr()->flushMva(mbits(newVal, 31,12));
+                    checker->getITBPtr()->flushMva(va,
+                        secure_lookup, false, target_el);
+                    checker->getDTBPtr()->flushMva(va,
+                        secure_lookup, false, target_el);
                 }
             }
             return;
-          case MISCREG_ITLBIMVA:
-            tc->getITBPtr()->flushMvaAsid(mbits(newVal, 31, 12),
-                    bits(newVal, 7,0));
-            return;
-          case MISCREG_DTLBIMVA:
-            tc->getDTBPtr()->flushMvaAsid(mbits(newVal, 31, 12),
-                    bits(newVal, 7,0));
-            return;
-          case MISCREG_ITLBIASID:
-            tc->getITBPtr()->flushAsid(bits(newVal, 7,0));
-            return;
-          case MISCREG_DTLBIASID:
-            tc->getDTBPtr()->flushAsid(bits(newVal, 7,0));
+          // AArch64 TLBI: invalidate by IPA, stage 2, current VMID
+          case MISCREG_TLBI_IPAS2LE1IS_Xt:
+          case MISCREG_TLBI_IPAS2LE1_Xt:
+          case MISCREG_TLBI_IPAS2E1IS_Xt:
+          case MISCREG_TLBI_IPAS2E1_Xt:
+            assert64(tc);
+            // @todo: implement these as part of Virtualization
+            warn("Not doing anything for write of miscreg ITLB_IPAS2\n");
             return;
           case MISCREG_ACTLR:
             warn("Not doing anything for write of miscreg ACTLR\n");
@@ -591,77 +1459,566 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
             warn("Not doing anything for write to miscreg %s\n",
                     miscRegName[misc_reg]);
             break;
-          case MISCREG_V2PCWPR:
-          case MISCREG_V2PCWPW:
-          case MISCREG_V2PCWUR:
-          case MISCREG_V2PCWUW:
-          case MISCREG_V2POWPR:
-          case MISCREG_V2POWPW:
-          case MISCREG_V2POWUR:
-          case MISCREG_V2POWUW:
+          case MISCREG_HSTR: // TJDBX, now redifined to be RES0
+            {
+                HSTR hstrMask = 0;
+                hstrMask.tjdbx = 1;
+                newVal &= ~((uint32_t) hstrMask);
+                break;
+            }
+          case MISCREG_HCPTR:
+            {
+                // If a CP bit in NSACR is 0 then the corresponding bit in
+                // HCPTR is RAO/WI. Same applies to NSASEDIS
+                secure_lookup = haveSecurity &&
+                    inSecureState(readMiscRegNoEffect(MISCREG_SCR),
+                                  readMiscRegNoEffect(MISCREG_CPSR));
+                if (!secure_lookup) {
+                    MiscReg oldValue = readMiscRegNoEffect(MISCREG_HCPTR);
+                    MiscReg mask = (readMiscRegNoEffect(MISCREG_NSACR) ^ 0x7FFF) & 0xBFFF;
+                    newVal = (newVal & ~mask) | (oldValue & mask);
+                }
+                break;
+            }
+          case MISCREG_HDFAR: // alias for secure DFAR
+            misc_reg = MISCREG_DFAR_S;
+            break;
+          case MISCREG_HIFAR: // alias for secure IFAR
+            misc_reg = MISCREG_IFAR_S;
+            break;
+          case MISCREG_ATS1CPR:
+          case MISCREG_ATS1CPW:
+          case MISCREG_ATS1CUR:
+          case MISCREG_ATS1CUW:
+          case MISCREG_ATS12NSOPR:
+          case MISCREG_ATS12NSOPW:
+          case MISCREG_ATS12NSOUR:
+          case MISCREG_ATS12NSOUW:
+          case MISCREG_ATS1HR:
+          case MISCREG_ATS1HW:
             {
               RequestPtr req = new Request;
-              unsigned flags;
-              BaseTLB::Mode mode;
+              unsigned flags = 0;
+              BaseTLB::Mode mode = BaseTLB::Read;
+              TLB::ArmTranslationType tranType = TLB::NormalTran;
               Fault fault;
               switch(misc_reg) {
-                  case MISCREG_V2PCWPR:
-                      flags = TLB::MustBeOne;
-                      mode = BaseTLB::Read;
-                      break;
-                  case MISCREG_V2PCWPW:
-                      flags = TLB::MustBeOne;
-                      mode = BaseTLB::Write;
-                      break;
-                  case MISCREG_V2PCWUR:
-                      flags = TLB::MustBeOne | TLB::UserMode;
-                      mode = BaseTLB::Read;
-                      break;
-                  case MISCREG_V2PCWUW:
-                      flags = TLB::MustBeOne | TLB::UserMode;
-                      mode = BaseTLB::Write;
-                      break;
-                  default:
-                      panic("Security Extensions not implemented!");
+                case MISCREG_ATS1CPR:
+                  flags    = TLB::MustBeOne;
+                  tranType = TLB::S1CTran;
+                  mode     = BaseTLB::Read;
+                  break;
+                case MISCREG_ATS1CPW:
+                  flags    = TLB::MustBeOne;
+                  tranType = TLB::S1CTran;
+                  mode     = BaseTLB::Write;
+                  break;
+                case MISCREG_ATS1CUR:
+                  flags    = TLB::MustBeOne | TLB::UserMode;
+                  tranType = TLB::S1CTran;
+                  mode     = BaseTLB::Read;
+                  break;
+                case MISCREG_ATS1CUW:
+                  flags    = TLB::MustBeOne | TLB::UserMode;
+                  tranType = TLB::S1CTran;
+                  mode     = BaseTLB::Write;
+                  break;
+                case MISCREG_ATS12NSOPR:
+                  if (!haveSecurity)
+                      panic("Security Extensions required for ATS12NSOPR");
+                  flags    = TLB::MustBeOne;
+                  tranType = TLB::S1S2NsTran;
+                  mode     = BaseTLB::Read;
+                  break;
+                case MISCREG_ATS12NSOPW:
+                  if (!haveSecurity)
+                      panic("Security Extensions required for ATS12NSOPW");
+                  flags    = TLB::MustBeOne;
+                  tranType = TLB::S1S2NsTran;
+                  mode     = BaseTLB::Write;
+                  break;
+                case MISCREG_ATS12NSOUR:
+                  if (!haveSecurity)
+                      panic("Security Extensions required for ATS12NSOUR");
+                  flags    = TLB::MustBeOne | TLB::UserMode;
+                  tranType = TLB::S1S2NsTran;
+                  mode     = BaseTLB::Read;
+                  break;
+                case MISCREG_ATS12NSOUW:
+                  if (!haveSecurity)
+                      panic("Security Extensions required for ATS12NSOUW");
+                  flags    = TLB::MustBeOne | TLB::UserMode;
+                  tranType = TLB::S1S2NsTran;
+                  mode     = BaseTLB::Write;
+                  break;
+                case MISCREG_ATS1HR: // only really useful from secure mode.
+                  flags    = TLB::MustBeOne;
+                  tranType = TLB::HypMode;
+                  mode     = BaseTLB::Read;
+                  break;
+                case MISCREG_ATS1HW:
+                  flags    = TLB::MustBeOne;
+                  tranType = TLB::HypMode;
+                  mode     = BaseTLB::Write;
+                  break;
               }
-              warn("Translating via MISCREG in atomic mode! Fix Me!\n");
-              req->setVirt(0, val, 1, flags, tc->pcState().pc(),
-                      Request::funcMasterId);
-              fault = tc->getDTBPtr()->translateAtomic(req, tc, mode);
+              // If we're in timing mode then doing the translation in
+              // functional mode then we're slightly distorting performance
+              // results obtained from simulations. The translation should be
+              // done in the same mode the core is running in. NOTE: This
+              // can't be an atomic translation because that causes problems
+              // with unexpected atomic snoop requests.
+              warn("Translating via MISCREG(%d) in functional mode! Fix Me!\n", misc_reg);
+              req->setVirt(0, val, 1, flags,  Request::funcMasterId,
+                           tc->pcState().pc());
+              req->setThreadContext(tc->contextId(), tc->threadId());
+              fault = tc->getDTBPtr()->translateFunctional(req, tc, mode, tranType);
+              TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR);
+              HCR   hcr   = readMiscRegNoEffect(MISCREG_HCR);
+
+              MiscReg newVal;
               if (fault == NoFault) {
-                  miscRegs[MISCREG_PAR] =
-                      (req->getPaddr() & 0xfffff000) |
-                      (tc->getDTBPtr()->getAttr() );
+                  Addr paddr = req->getPaddr();
+                  if (haveLPAE && (ttbcr.eae || tranType & TLB::HypMode ||
+                     ((tranType & TLB::S1S2NsTran) && hcr.vm) )) {
+                      newVal = (paddr & mask(39, 12)) |
+                               (tc->getDTBPtr()->getAttr());
+                  } else {
+                      newVal = (paddr & 0xfffff000) |
+                               (tc->getDTBPtr()->getAttr());
+                  }
                   DPRINTF(MiscRegs,
                           "MISCREG: Translated addr 0x%08x: PAR: 0x%08x\n",
-                          val, miscRegs[MISCREG_PAR]);
-              }
-              else {
+                          val, newVal);
+              } else {
+                  ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get());
                   // Set fault bit and FSR
-                  FSR fsr = miscRegs[MISCREG_DFSR];
-                  miscRegs[MISCREG_PAR] =
-                      (fsr.ext << 6) |
-                      (fsr.fsHigh << 5) |
-                      (fsr.fsLow << 1) |
-                      0x1; // F bit
+                  FSR fsr = armFault->getFsr(tc);
+
+                  newVal = ((fsr >> 9) & 1) << 11;
+                  if (newVal) {
+                    // LPAE - rearange fault status
+                    newVal |= ((fsr >>  0) & 0x3f) << 1;
+                  } else {
+                    // VMSA - rearange fault status
+                    newVal |= ((fsr >>  0) & 0xf) << 1;
+                    newVal |= ((fsr >> 10) & 0x1) << 5;
+                    newVal |= ((fsr >> 12) & 0x1) << 6;
+                  }
+                  newVal |= 0x1; // F bit
+                  newVal |= ((armFault->iss() >> 7) & 0x1) << 8;
+                  newVal |= armFault->isStage2() ? 0x200 : 0;
+                  DPRINTF(MiscRegs,
+                          "MISCREG: Translated addr 0x%08x fault fsr %#x: PAR: 0x%08x\n",
+                          val, fsr, newVal);
               }
+              delete req;
+              setMiscRegNoEffect(MISCREG_PAR, newVal);
               return;
             }
+          case MISCREG_TTBCR:
+            {
+                TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR);
+                const uint32_t ones = (uint32_t)(-1);
+                TTBCR ttbcrMask = 0;
+                TTBCR ttbcrNew = newVal;
+
+                // ARM DDI 0406C.b, ARMv7-32
+                ttbcrMask.n = ones; // T0SZ
+                if (haveSecurity) {
+                    ttbcrMask.pd0 = ones;
+                    ttbcrMask.pd1 = ones;
+                }
+                ttbcrMask.epd0 = ones;
+                ttbcrMask.irgn0 = ones;
+                ttbcrMask.orgn0 = ones;
+                ttbcrMask.sh0 = ones;
+                ttbcrMask.ps = ones; // T1SZ
+                ttbcrMask.a1 = ones;
+                ttbcrMask.epd1 = ones;
+                ttbcrMask.irgn1 = ones;
+                ttbcrMask.orgn1 = ones;
+                ttbcrMask.sh1 = ones;
+                if (haveLPAE)
+                    ttbcrMask.eae = ones;
+
+                if (haveLPAE && ttbcrNew.eae) {
+                    newVal = newVal & ttbcrMask;
+                } else {
+                    newVal = (newVal & ttbcrMask) | (ttbcr & (~ttbcrMask));
+                }
+            }
+          case MISCREG_TTBR0:
+          case MISCREG_TTBR1:
+            {
+                TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR);
+                if (haveLPAE) {
+                    if (ttbcr.eae) {
+                        // ARMv7 bit 63-56, 47-40 reserved, UNK/SBZP
+                        // ARMv8 AArch32 bit 63-56 only
+                        uint64_t ttbrMask = mask(63,56) | mask(47,40);
+                        newVal = (newVal & (~ttbrMask));
+                    }
+                }
+            }
           case MISCREG_CONTEXTIDR:
           case MISCREG_PRRR:
           case MISCREG_NMRR:
+          case MISCREG_MAIR0:
+          case MISCREG_MAIR1:
           case MISCREG_DACR:
+          case MISCREG_VTTBR:
+          case MISCREG_SCR_EL3:
+          case MISCREG_SCTLR_EL1:
+          case MISCREG_SCTLR_EL2:
+          case MISCREG_SCTLR_EL3:
+          case MISCREG_TCR_EL1:
+          case MISCREG_TCR_EL2:
+          case MISCREG_TCR_EL3:
+          case MISCREG_TTBR0_EL1:
+          case MISCREG_TTBR1_EL1:
+          case MISCREG_TTBR0_EL2:
+          case MISCREG_TTBR0_EL3:
             tc->getITBPtr()->invalidateMiscReg();
             tc->getDTBPtr()->invalidateMiscReg();
             break;
+          case MISCREG_NZCV:
+            {
+                CPSR cpsr = val;
+
+                tc->setIntReg(INTREG_CONDCODES_NZ, cpsr.nz);
+                tc->setIntReg(INTREG_CONDCODES_C,  cpsr.c);
+                tc->setIntReg(INTREG_CONDCODES_V,  cpsr.v);
+            }
+            break;
+          case MISCREG_DAIF:
+            {
+                CPSR cpsr = miscRegs[MISCREG_CPSR];
+                cpsr.daif = (uint8_t) ((CPSR) newVal).daif;
+                newVal = cpsr;
+                misc_reg = MISCREG_CPSR;
+            }
+            break;
+          case MISCREG_SP_EL0:
+            tc->setIntReg(INTREG_SP0, newVal);
+            break;
+          case MISCREG_SP_EL1:
+            tc->setIntReg(INTREG_SP1, newVal);
+            break;
+          case MISCREG_SP_EL2:
+            tc->setIntReg(INTREG_SP2, newVal);
+            break;
+          case MISCREG_SPSEL:
+            {
+                CPSR cpsr = miscRegs[MISCREG_CPSR];
+                cpsr.sp = (uint8_t) ((CPSR) newVal).sp;
+                newVal = cpsr;
+                misc_reg = MISCREG_CPSR;
+            }
+            break;
+          case MISCREG_CURRENTEL:
+            {
+                CPSR cpsr = miscRegs[MISCREG_CPSR];
+                cpsr.el = (uint8_t) ((CPSR) newVal).el;
+                newVal = cpsr;
+                misc_reg = MISCREG_CPSR;
+            }
+            break;
+          case MISCREG_AT_S1E1R_Xt:
+          case MISCREG_AT_S1E1W_Xt:
+          case MISCREG_AT_S1E0R_Xt:
+          case MISCREG_AT_S1E0W_Xt:
+          case MISCREG_AT_S1E2R_Xt:
+          case MISCREG_AT_S1E2W_Xt:
+          case MISCREG_AT_S12E1R_Xt:
+          case MISCREG_AT_S12E1W_Xt:
+          case MISCREG_AT_S12E0R_Xt:
+          case MISCREG_AT_S12E0W_Xt:
+          case MISCREG_AT_S1E3R_Xt:
+          case MISCREG_AT_S1E3W_Xt:
+            {
+                RequestPtr req = new Request;
+                unsigned flags = 0;
+                BaseTLB::Mode mode = BaseTLB::Read;
+                TLB::ArmTranslationType tranType = TLB::NormalTran;
+                Fault fault;
+                switch(misc_reg) {
+                  case MISCREG_AT_S1E1R_Xt:
+                    flags    = TLB::MustBeOne;
+                    tranType = TLB::S1CTran;
+                    mode     = BaseTLB::Read;
+                    break;
+                  case MISCREG_AT_S1E1W_Xt:
+                    flags    = TLB::MustBeOne;
+                    tranType = TLB::S1CTran;
+                    mode     = BaseTLB::Write;
+                    break;
+                  case MISCREG_AT_S1E0R_Xt:
+                    flags    = TLB::MustBeOne | TLB::UserMode;
+                    tranType = TLB::S1CTran;
+                    mode     = BaseTLB::Read;
+                    break;
+                  case MISCREG_AT_S1E0W_Xt:
+                    flags    = TLB::MustBeOne | TLB::UserMode;
+                    tranType = TLB::S1CTran;
+                    mode     = BaseTLB::Write;
+                    break;
+                  case MISCREG_AT_S1E2R_Xt:
+                    flags    = TLB::MustBeOne;
+                    tranType = TLB::HypMode;
+                    mode     = BaseTLB::Read;
+                    break;
+                  case MISCREG_AT_S1E2W_Xt:
+                    flags    = TLB::MustBeOne;
+                    tranType = TLB::HypMode;
+                    mode     = BaseTLB::Write;
+                    break;
+                  case MISCREG_AT_S12E0R_Xt:
+                    flags    = TLB::MustBeOne | TLB::UserMode;
+                    tranType = TLB::S1S2NsTran;
+                    mode     = BaseTLB::Read;
+                    break;
+                  case MISCREG_AT_S12E0W_Xt:
+                    flags    = TLB::MustBeOne | TLB::UserMode;
+                    tranType = TLB::S1S2NsTran;
+                    mode     = BaseTLB::Write;
+                    break;
+                  case MISCREG_AT_S12E1R_Xt:
+                    flags    = TLB::MustBeOne;
+                    tranType = TLB::S1S2NsTran;
+                    mode     = BaseTLB::Read;
+                    break;
+                  case MISCREG_AT_S12E1W_Xt:
+                    flags    = TLB::MustBeOne;
+                    tranType = TLB::S1S2NsTran;
+                    mode     = BaseTLB::Write;
+                    break;
+                  case MISCREG_AT_S1E3R_Xt:
+                    flags    = TLB::MustBeOne;
+                    tranType = TLB::HypMode; // There is no TZ mode defined.
+                    mode     = BaseTLB::Read;
+                    break;
+                  case MISCREG_AT_S1E3W_Xt:
+                    flags    = TLB::MustBeOne;
+                    tranType = TLB::HypMode; // There is no TZ mode defined.
+                    mode     = BaseTLB::Write;
+                    break;
+                }
+                // If we're in timing mode then doing the translation in
+                // functional mode then we're slightly distorting performance
+                // results obtained from simulations. The translation should be
+                // done in the same mode the core is running in. NOTE: This
+                // can't be an atomic translation because that causes problems
+                // with unexpected atomic snoop requests.
+                warn("Translating via MISCREG(%d) in functional mode! Fix Me!\n", misc_reg);
+                req->setVirt(0, val, 1, flags,  Request::funcMasterId,
+                               tc->pcState().pc());
+                req->setThreadContext(tc->contextId(), tc->threadId());
+                fault = tc->getDTBPtr()->translateFunctional(req, tc, mode,
+                                                             tranType);
+
+                MiscReg newVal;
+                if (fault == NoFault) {
+                    Addr paddr = req->getPaddr();
+                    uint64_t attr = tc->getDTBPtr()->getAttr();
+                    uint64_t attr1 = attr >> 56;
+                    if (!attr1 || attr1 ==0x44) {
+                        attr |= 0x100;
+                        attr &= ~ uint64_t(0x80);
+                    }
+                    newVal = (paddr & mask(47, 12)) | attr;
+                    DPRINTF(MiscRegs,
+                          "MISCREG: Translated addr %#x: PAR_EL1: %#xx\n",
+                          val, newVal);
+                } else {
+                    ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get());
+                    // Set fault bit and FSR
+                    FSR fsr = armFault->getFsr(tc);
+
+                    newVal = ((fsr >> 9) & 1) << 11;
+                    // rearange fault status
+                    newVal |= ((fsr >>  0) & 0x3f) << 1;
+                    newVal |= 0x1; // F bit
+                    newVal |= ((armFault->iss() >> 7) & 0x1) << 8;
+                    newVal |= armFault->isStage2() ? 0x200 : 0;
+                    DPRINTF(MiscRegs,
+                            "MISCREG: Translated addr %#x fault fsr %#x: PAR: %#x\n",
+                            val, fsr, newVal);
+                }
+                delete req;
+                setMiscRegNoEffect(MISCREG_PAR_EL1, newVal);
+                return;
+            }
+          case MISCREG_SPSR_EL3:
+          case MISCREG_SPSR_EL2:
+          case MISCREG_SPSR_EL1:
+            // Force bits 23:21 to 0
+            newVal = val & ~(0x7 << 21);
+            break;
           case MISCREG_L2CTLR:
             warn("miscreg L2CTLR (%s) written with %#x. ignored...\n",
                  miscRegName[misc_reg], uint32_t(val));
+            break;
+
+          // Generic Timer registers
+          case MISCREG_CNTFRQ:
+          case MISCREG_CNTFRQ_EL0:
+            getSystemCounter(tc)->setFreq(val);
+            break;
+          case MISCREG_CNTP_CVAL:
+          case MISCREG_CNTP_CVAL_EL0:
+            getArchTimer(tc, tc->cpuId())->setCompareValue(val);
+            break;
+          case MISCREG_CNTP_TVAL:
+          case MISCREG_CNTP_TVAL_EL0:
+            getArchTimer(tc, tc->cpuId())->setTimerValue(val);
+            break;
+          case MISCREG_CNTP_CTL:
+          case MISCREG_CNTP_CTL_EL0:
+            getArchTimer(tc, tc->cpuId())->setControl(val);
+            break;
+          // PL1 phys. timer, secure
+          //   AArch64
+          case MISCREG_CNTPS_CVAL_EL1:
+          case MISCREG_CNTPS_TVAL_EL1:
+          case MISCREG_CNTPS_CTL_EL1:
+          // PL2 phys. timer, non-secure
+          //   AArch32
+          case MISCREG_CNTHCTL:
+          case MISCREG_CNTHP_CVAL:
+          case MISCREG_CNTHP_TVAL:
+          case MISCREG_CNTHP_CTL:
+          //   AArch64
+          case MISCREG_CNTHCTL_EL2:
+          case MISCREG_CNTHP_CVAL_EL2:
+          case MISCREG_CNTHP_TVAL_EL2:
+          case MISCREG_CNTHP_CTL_EL2:
+          // Virtual timer
+          //   AArch32
+          case MISCREG_CNTV_CVAL:
+          case MISCREG_CNTV_TVAL:
+          case MISCREG_CNTV_CTL:
+          //   AArch64
+          // case MISCREG_CNTV_CVAL_EL2:
+          // case MISCREG_CNTV_TVAL_EL2:
+          // case MISCREG_CNTV_CTL_EL2:
+            panic("Generic Timer register not implemented\n");
+            break;
         }
     }
     setMiscRegNoEffect(misc_reg, newVal);
 }
 
+void
+ISA::tlbiVA(ThreadContext *tc, MiscReg newVal, uint8_t asid, bool secure_lookup,
+            uint8_t target_el)
+{
+    if (haveLargeAsid64)
+        asid &= mask(8);
+    Addr va = ((Addr) bits(newVal, 43, 0)) << 12;
+    System *sys = tc->getSystemPtr();
+    for (int x = 0; x < sys->numContexts(); x++) {
+        ThreadContext *oc = sys->getThreadContext(x);
+        assert(oc->getITBPtr() && oc->getDTBPtr());
+        oc->getITBPtr()->flushMvaAsid(va, asid,
+                                      secure_lookup, target_el);
+        oc->getDTBPtr()->flushMvaAsid(va, asid,
+                                      secure_lookup, target_el);
+
+        CheckerCPU *checker = oc->getCheckerCpuPtr();
+        if (checker) {
+            checker->getITBPtr()->flushMvaAsid(
+                va, asid, secure_lookup, target_el);
+            checker->getDTBPtr()->flushMvaAsid(
+                va, asid, secure_lookup, target_el);
+        }
+    }
+}
+
+void
+ISA::tlbiALL(ThreadContext *tc, bool secure_lookup, uint8_t target_el)
+{
+    System *sys = tc->getSystemPtr();
+    for (int x = 0; x < sys->numContexts(); x++) {
+        ThreadContext *oc = sys->getThreadContext(x);
+        assert(oc->getITBPtr() && oc->getDTBPtr());
+        oc->getITBPtr()->flushAllSecurity(secure_lookup, target_el);
+        oc->getDTBPtr()->flushAllSecurity(secure_lookup, target_el);
+
+        // If CheckerCPU is connected, need to notify it of a flush
+        CheckerCPU *checker = oc->getCheckerCpuPtr();
+        if (checker) {
+            checker->getITBPtr()->flushAllSecurity(secure_lookup,
+                                                   target_el);
+            checker->getDTBPtr()->flushAllSecurity(secure_lookup,
+                                                   target_el);
+        }
+    }
+}
+
+void
+ISA::tlbiALLN(ThreadContext *tc, bool hyp, uint8_t target_el)
+{
+    System *sys = tc->getSystemPtr();
+    for (int x = 0; x < sys->numContexts(); x++) {
+      ThreadContext *oc = sys->getThreadContext(x);
+      assert(oc->getITBPtr() && oc->getDTBPtr());
+      oc->getITBPtr()->flushAllNs(hyp, target_el);
+      oc->getDTBPtr()->flushAllNs(hyp, target_el);
+
+      CheckerCPU *checker = oc->getCheckerCpuPtr();
+      if (checker) {
+          checker->getITBPtr()->flushAllNs(hyp, target_el);
+          checker->getDTBPtr()->flushAllNs(hyp, target_el);
+      }
+    }
+}
+
+void
+ISA::tlbiMVA(ThreadContext *tc, MiscReg newVal, bool secure_lookup, bool hyp,
+             uint8_t target_el)
+{
+    System *sys = tc->getSystemPtr();
+    for (int x = 0; x < sys->numContexts(); x++) {
+        ThreadContext *oc = sys->getThreadContext(x);
+        assert(oc->getITBPtr() && oc->getDTBPtr());
+        oc->getITBPtr()->flushMva(mbits(newVal, 31,12),
+            secure_lookup, hyp, target_el);
+        oc->getDTBPtr()->flushMva(mbits(newVal, 31,12),
+            secure_lookup, hyp, target_el);
+
+        CheckerCPU *checker = oc->getCheckerCpuPtr();
+        if (checker) {
+            checker->getITBPtr()->flushMva(mbits(newVal, 31,12),
+                secure_lookup, hyp, target_el);
+            checker->getDTBPtr()->flushMva(mbits(newVal, 31,12),
+                secure_lookup, hyp, target_el);
+        }
+    }
+}
+
+::GenericTimer::SystemCounter *
+ISA::getSystemCounter(ThreadContext *tc)
+{
+    ::GenericTimer::SystemCounter *cnt = ((ArmSystem *) tc->getSystemPtr())->
+        getSystemCounter();
+    if (cnt == NULL) {
+        panic("System counter not available\n");
+    }
+    return cnt;
+}
+
+::GenericTimer::ArchTimer *
+ISA::getArchTimer(ThreadContext *tc, int cpu_id)
+{
+    ::GenericTimer::ArchTimer *timer = ((ArmSystem *) tc->getSystemPtr())->
+        getArchTimer(cpu_id);
+    if (timer == NULL) {
+        panic("Architected timer not available\n");
+    }
+    return timer;
+}
+
 }
 
 ArmISA::ISA *
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index c747fc770..c72d5d50f 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -44,9 +44,11 @@
 #define __ARCH_ARM_ISA_HH__
 
 #include "arch/arm/registers.hh"
+#include "arch/arm/system.hh"
 #include "arch/arm/tlb.hh"
 #include "arch/arm/types.hh"
 #include "debug/Checkpoint.hh"
+#include "dev/arm/generic_timer.hh"
 #include "sim/sim_object.hh"
 
 struct ArmISAParams;
@@ -56,45 +58,174 @@ class EventManager;
 
 namespace ArmISA
 {
+
+    /**
+     * At the moment there are 57 registers which need to be aliased/
+     * translated with other registers in the ISA. This enum helps with that
+     * translation.
+     */
+    enum translateTable {
+        miscRegTranslateCSSELR_EL1,
+        miscRegTranslateSCTLR_EL1,
+        miscRegTranslateSCTLR_EL2,
+        miscRegTranslateACTLR_EL1,
+        miscRegTranslateACTLR_EL2,
+        miscRegTranslateCPACR_EL1,
+        miscRegTranslateCPTR_EL2,
+        miscRegTranslateHCR_EL2,
+        miscRegTranslateMDCR_EL2,
+        miscRegTranslateHSTR_EL2,
+        miscRegTranslateHACR_EL2,
+        miscRegTranslateTTBR0_EL1,
+        miscRegTranslateTTBR1_EL1,
+        miscRegTranslateTTBR0_EL2,
+        miscRegTranslateVTTBR_EL2,
+        miscRegTranslateTCR_EL1,
+        miscRegTranslateTCR_EL2,
+        miscRegTranslateVTCR_EL2,
+        miscRegTranslateAFSR0_EL1,
+        miscRegTranslateAFSR1_EL1,
+        miscRegTranslateAFSR0_EL2,
+        miscRegTranslateAFSR1_EL2,
+        miscRegTranslateESR_EL2,
+        miscRegTranslateFAR_EL1,
+        miscRegTranslateFAR_EL2,
+        miscRegTranslateHPFAR_EL2,
+        miscRegTranslatePAR_EL1,
+        miscRegTranslateMAIR_EL1,
+        miscRegTranslateMAIR_EL2,
+        miscRegTranslateAMAIR_EL1,
+        miscRegTranslateVBAR_EL1,
+        miscRegTranslateVBAR_EL2,
+        miscRegTranslateCONTEXTIDR_EL1,
+        miscRegTranslateTPIDR_EL0,
+        miscRegTranslateTPIDRRO_EL0,
+        miscRegTranslateTPIDR_EL1,
+        miscRegTranslateTPIDR_EL2,
+        miscRegTranslateTEECR32_EL1,
+        miscRegTranslateCNTFRQ_EL0,
+        miscRegTranslateCNTPCT_EL0,
+        miscRegTranslateCNTVCT_EL0,
+        miscRegTranslateCNTVOFF_EL2,
+        miscRegTranslateCNTKCTL_EL1,
+        miscRegTranslateCNTHCTL_EL2,
+        miscRegTranslateCNTP_TVAL_EL0,
+        miscRegTranslateCNTP_CTL_EL0,
+        miscRegTranslateCNTP_CVAL_EL0,
+        miscRegTranslateCNTV_TVAL_EL0,
+        miscRegTranslateCNTV_CTL_EL0,
+        miscRegTranslateCNTV_CVAL_EL0,
+        miscRegTranslateCNTHP_TVAL_EL2,
+        miscRegTranslateCNTHP_CTL_EL2,
+        miscRegTranslateCNTHP_CVAL_EL2,
+        miscRegTranslateDACR32_EL2,
+        miscRegTranslateIFSR32_EL2,
+        miscRegTranslateTEEHBR32_EL1,
+        miscRegTranslateSDER32_EL3,
+        miscRegTranslateMax
+    };
+
     class ISA : public SimObject
     {
       protected:
+        // Parent system
+        ArmSystem *system;
+
+        // Cached copies of system-level properties
+        bool haveSecurity;
+        bool haveLPAE;
+        bool haveVirtualization;
+        bool haveLargeAsid64;
+        uint8_t physAddrRange64;
+
+        /** Register translation entry used in lookUpMiscReg */
+        struct MiscRegLUTEntry {
+            uint32_t lower;
+            uint32_t upper;
+        };
+
+        struct MiscRegInitializerEntry {
+            uint32_t index;
+            struct MiscRegLUTEntry entry;
+        };
+
+        /** Register table noting all translations */
+        static const struct MiscRegInitializerEntry
+                            MiscRegSwitch[miscRegTranslateMax];
+
+        /** Translation table accessible via the value of the register */
+        std::vector<struct MiscRegLUTEntry> lookUpMiscReg;
+
         MiscReg miscRegs[NumMiscRegs];
         const IntRegIndex *intRegMap;
 
         void
         updateRegMap(CPSR cpsr)
         {
-            switch (cpsr.mode) {
-              case MODE_USER:
-              case MODE_SYSTEM:
-                intRegMap = IntRegUsrMap;
-                break;
-              case MODE_FIQ:
-                intRegMap = IntRegFiqMap;
-                break;
-              case MODE_IRQ:
-                intRegMap = IntRegIrqMap;
-                break;
-              case MODE_SVC:
-                intRegMap = IntRegSvcMap;
-                break;
-              case MODE_MON:
-                intRegMap = IntRegMonMap;
-                break;
-              case MODE_ABORT:
-                intRegMap = IntRegAbtMap;
-                break;
-              case MODE_UNDEFINED:
-                intRegMap = IntRegUndMap;
-                break;
-              default:
-                panic("Unrecognized mode setting in CPSR.\n");
+            if (cpsr.width == 0) {
+                intRegMap = IntReg64Map;
+            } else {
+                switch (cpsr.mode) {
+                  case MODE_USER:
+                  case MODE_SYSTEM:
+                    intRegMap = IntRegUsrMap;
+                    break;
+                  case MODE_FIQ:
+                    intRegMap = IntRegFiqMap;
+                    break;
+                  case MODE_IRQ:
+                    intRegMap = IntRegIrqMap;
+                    break;
+                  case MODE_SVC:
+                    intRegMap = IntRegSvcMap;
+                    break;
+                  case MODE_MON:
+                    intRegMap = IntRegMonMap;
+                    break;
+                  case MODE_ABORT:
+                    intRegMap = IntRegAbtMap;
+                    break;
+                  case MODE_HYP:
+                    intRegMap = IntRegHypMap;
+                    break;
+                  case MODE_UNDEFINED:
+                    intRegMap = IntRegUndMap;
+                    break;
+                  default:
+                    panic("Unrecognized mode setting in CPSR.\n");
+                }
             }
         }
 
+        ::GenericTimer::SystemCounter * getSystemCounter(ThreadContext *tc);
+        ::GenericTimer::ArchTimer * getArchTimer(ThreadContext *tc,
+                                                 int cpu_id);
+
+
+      private:
+        inline void assert32(ThreadContext *tc) {
+            CPSR cpsr M5_VAR_USED = readMiscReg(MISCREG_CPSR, tc);
+            assert(cpsr.width);
+        }
+
+        inline void assert64(ThreadContext *tc) {
+            CPSR cpsr M5_VAR_USED = readMiscReg(MISCREG_CPSR, tc);
+            assert(!cpsr.width);
+        }
+
+        void tlbiVA(ThreadContext *tc, MiscReg newVal, uint8_t asid,
+                    bool secure_lookup, uint8_t target_el);
+
+        void tlbiALL(ThreadContext *tc, bool secure_lookup, uint8_t target_el);
+
+        void tlbiALLN(ThreadContext *tc, bool hyp, uint8_t target_el);
+
+        void tlbiMVA(ThreadContext *tc, MiscReg newVal, bool secure_lookup,
+                     bool hyp, uint8_t target_el);
+
       public:
         void clear();
+        void clear64(const ArmISAParams *p);
 
         MiscReg readMiscRegNoEffect(int misc_reg) const;
         MiscReg readMiscReg(int misc_reg, ThreadContext *tc);
@@ -109,28 +240,28 @@ namespace ArmISA
                 return intRegMap[reg];
             } else if (reg < NUM_INTREGS) {
                 return reg;
-            } else {
-                int mode = reg / intRegsPerMode;
-                reg = reg % intRegsPerMode;
-                switch (mode) {
-                  case MODE_USER:
-                  case MODE_SYSTEM:
-                    return INTREG_USR(reg);
-                  case MODE_FIQ:
-                    return INTREG_FIQ(reg);
-                  case MODE_IRQ:
-                    return INTREG_IRQ(reg);
-                  case MODE_SVC:
-                    return INTREG_SVC(reg);
-                  case MODE_MON:
-                    return INTREG_MON(reg);
-                  case MODE_ABORT:
-                    return INTREG_ABT(reg);
-                  case MODE_UNDEFINED:
-                    return INTREG_UND(reg);
+            } else if (reg == INTREG_SPX) {
+                CPSR cpsr = miscRegs[MISCREG_CPSR];
+                ExceptionLevel el = opModeToEL(
+                    (OperatingMode) (uint8_t) cpsr.mode);
+                if (!cpsr.sp && el != EL0)
+                    return INTREG_SP0;
+                switch (el) {
+                  case EL3:
+                    return INTREG_SP3;
+                  // @todo: uncomment this to enable Virtualization
+                  // case EL2:
+                  //   return INTREG_SP2;
+                  case EL1:
+                    return INTREG_SP1;
+                  case EL0:
+                    return INTREG_SP0;
                   default:
-                    panic("Flattening into an unknown mode.\n");
+                    panic("Invalid exception level");
+                    break;
                 }
+            } else {
+                return flattenIntRegModeIndex(reg);
             }
         }
 
@@ -150,47 +281,127 @@ namespace ArmISA
         int
         flattenMiscIndex(int reg) const
         {
+            int flat_idx = reg;
+
             if (reg == MISCREG_SPSR) {
-                int spsr_idx = NUM_MISCREGS;
                 CPSR cpsr = miscRegs[MISCREG_CPSR];
                 switch (cpsr.mode) {
+                  case MODE_EL0T:
+                    warn("User mode does not have SPSR\n");
+                    flat_idx = MISCREG_SPSR;
+                    break;
+                  case MODE_EL1T:
+                  case MODE_EL1H:
+                    flat_idx = MISCREG_SPSR_EL1;
+                    break;
+                  case MODE_EL2T:
+                  case MODE_EL2H:
+                    flat_idx = MISCREG_SPSR_EL2;
+                    break;
+                  case MODE_EL3T:
+                  case MODE_EL3H:
+                    flat_idx = MISCREG_SPSR_EL3;
+                    break;
                   case MODE_USER:
                     warn("User mode does not have SPSR\n");
-                    spsr_idx = MISCREG_SPSR;
+                    flat_idx = MISCREG_SPSR;
                     break;
                   case MODE_FIQ:
-                    spsr_idx = MISCREG_SPSR_FIQ;
+                    flat_idx = MISCREG_SPSR_FIQ;
                     break;
                   case MODE_IRQ:
-                    spsr_idx = MISCREG_SPSR_IRQ;
+                    flat_idx = MISCREG_SPSR_IRQ;
                     break;
                   case MODE_SVC:
-                    spsr_idx = MISCREG_SPSR_SVC;
+                    flat_idx = MISCREG_SPSR_SVC;
                     break;
                   case MODE_MON:
-                    spsr_idx = MISCREG_SPSR_MON;
+                    flat_idx = MISCREG_SPSR_MON;
                     break;
                   case MODE_ABORT:
-                    spsr_idx = MISCREG_SPSR_ABT;
+                    flat_idx = MISCREG_SPSR_ABT;
+                    break;
+                  case MODE_HYP:
+                    flat_idx = MISCREG_SPSR_HYP;
                     break;
                   case MODE_UNDEFINED:
-                    spsr_idx = MISCREG_SPSR_UND;
+                    flat_idx = MISCREG_SPSR_UND;
                     break;
                   default:
                     warn("Trying to access SPSR in an invalid mode: %d\n",
                          cpsr.mode);
-                    spsr_idx = MISCREG_SPSR;
+                    flat_idx = MISCREG_SPSR;
                     break;
                 }
-                return spsr_idx;
+            } else if (miscRegInfo[reg][MISCREG_MUTEX]) {
+                // Mutually exclusive CP15 register
+                switch (reg) {
+                  case MISCREG_PRRR_MAIR0:
+                  case MISCREG_PRRR_MAIR0_NS:
+                  case MISCREG_PRRR_MAIR0_S:
+                    {
+                        TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR);
+                        // If the muxed reg has been flattened, work out the
+                        // offset and apply it to the unmuxed reg
+                        int idxOffset = reg - MISCREG_PRRR_MAIR0;
+                        if (ttbcr.eae)
+                            flat_idx = flattenMiscIndex(MISCREG_MAIR0 +
+                                                        idxOffset);
+                        else
+                            flat_idx = flattenMiscIndex(MISCREG_PRRR +
+                                                        idxOffset);
+                    }
+                    break;
+                  case MISCREG_NMRR_MAIR1:
+                  case MISCREG_NMRR_MAIR1_NS:
+                  case MISCREG_NMRR_MAIR1_S:
+                    {
+                        TTBCR ttbcr = readMiscRegNoEffect(MISCREG_TTBCR);
+                        // If the muxed reg has been flattened, work out the
+                        // offset and apply it to the unmuxed reg
+                        int idxOffset = reg - MISCREG_NMRR_MAIR1;
+                        if (ttbcr.eae)
+                            flat_idx = flattenMiscIndex(MISCREG_MAIR1 +
+                                                        idxOffset);
+                        else
+                            flat_idx = flattenMiscIndex(MISCREG_NMRR +
+                                                        idxOffset);
+                    }
+                    break;
+                  case MISCREG_PMXEVTYPER_PMCCFILTR:
+                    {
+                        PMSELR pmselr = miscRegs[MISCREG_PMSELR];
+                        if (pmselr.sel == 31)
+                            flat_idx = flattenMiscIndex(MISCREG_PMCCFILTR);
+                        else
+                            flat_idx = flattenMiscIndex(MISCREG_PMXEVTYPER);
+                    }
+                    break;
+                  default:
+                    panic("Unrecognized misc. register.\n");
+                    break;
+                }
+            } else {
+                if (miscRegInfo[reg][MISCREG_BANKED]) {
+                    bool secureReg = haveSecurity &&
+                                     inSecureState(miscRegs[MISCREG_SCR],
+                                                   miscRegs[MISCREG_CPSR]);
+                    flat_idx += secureReg ? 2 : 1;
+                }
             }
-            return reg;
+            return flat_idx;
         }
 
         void serialize(std::ostream &os)
         {
             DPRINTF(Checkpoint, "Serializing Arm Misc Registers\n");
             SERIALIZE_ARRAY(miscRegs, NumMiscRegs);
+
+            SERIALIZE_SCALAR(haveSecurity);
+            SERIALIZE_SCALAR(haveLPAE);
+            SERIALIZE_SCALAR(haveVirtualization);
+            SERIALIZE_SCALAR(haveLargeAsid64);
+            SERIALIZE_SCALAR(physAddrRange64);
         }
         void unserialize(Checkpoint *cp, const std::string &section)
         {
@@ -198,6 +409,12 @@ namespace ArmISA
             UNSERIALIZE_ARRAY(miscRegs, NumMiscRegs);
             CPSR tmp_cpsr = miscRegs[MISCREG_CPSR];
             updateRegMap(tmp_cpsr);
+
+            UNSERIALIZE_SCALAR(haveSecurity);
+            UNSERIALIZE_SCALAR(haveLPAE);
+            UNSERIALIZE_SCALAR(haveVirtualization);
+            UNSERIALIZE_SCALAR(haveLargeAsid64);
+            UNSERIALIZE_SCALAR(physAddrRange64);
         }
 
         void startup(ThreadContext *tc) {}
diff --git a/src/arch/arm/isa/bitfields.isa b/src/arch/arm/isa/bitfields.isa
index 5a8b5db6d..6006cfb2d 100644
--- a/src/arch/arm/isa/bitfields.isa
+++ b/src/arch/arm/isa/bitfields.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -73,6 +73,7 @@ def bitfield SEVEN_AND_FOUR sevenAndFour;
 
 def bitfield THUMB         thumb;
 def bitfield BIGTHUMB      bigThumb;
+def bitfield AARCH64       aarch64;
 
 // Other
 def bitfield COND_CODE     condCode;
diff --git a/src/arch/arm/isa/decoder/aarch64.isa b/src/arch/arm/isa/decoder/aarch64.isa
new file mode 100644
index 000000000..a6c0fa2df
--- /dev/null
+++ b/src/arch/arm/isa/decoder/aarch64.isa
@@ -0,0 +1,48 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+////////////////////////////////////////////////////////////////////
+//
+// The 64 bit ARM decoder
+// --------------------------
+//
+
+
+Aarch64::aarch64();
+
diff --git a/src/arch/arm/isa/decoder/arm.isa b/src/arch/arm/isa/decoder/arm.isa
index 4bd9d5cf4..f0c0dec18 100644
--- a/src/arch/arm/isa/decoder/arm.isa
+++ b/src/arch/arm/isa/decoder/arm.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010-2012 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -73,7 +73,11 @@ format DataOp {
                         0x9: ArmBlxReg::armBlxReg();
                     }
                     0x5: ArmSatAddSub::armSatAddSub();
-                    0x7: Breakpoint::bkpt();
+                    0x6: ArmERet::armERet();
+                    0x7: decode OPCODE_22 {
+                        0: Breakpoint::bkpt();
+                        1: ArmSmcHyp::armSmcHyp();
+                    }
                 }
                 0x1: ArmHalfWordMultAndMultAcc::armHalfWordMultAndMultAcc();
             }
@@ -105,6 +109,10 @@ format DataOp {
     }
     0x6: decode CPNUM {
         0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStore();
+        0xf: decode OPCODE_20 {
+            0: Mcrr15::Mcrr15();
+            1: Mrrc15::Mrrc15();
+        }
     }
     0x7: decode OPCODE_24 {
         0: decode OPCODE_4 {
diff --git a/src/arch/arm/isa/decoder/decoder.isa b/src/arch/arm/isa/decoder/decoder.isa
index cf7d17871..94685b943 100644
--- a/src/arch/arm/isa/decoder/decoder.isa
+++ b/src/arch/arm/isa/decoder/decoder.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -41,8 +41,12 @@
 // Authors: Gabe Black
 
 decode THUMB default Unknown::unknown() {
-0:
-##include "arm.isa"
+0: decode AARCH64 {
+    0:
+    ##include "arm.isa"
+    1:
+    ##include "aarch64.isa"
+}
 1:
 ##include "thumb.isa"
 }
diff --git a/src/arch/arm/isa/decoder/thumb.isa b/src/arch/arm/isa/decoder/thumb.isa
index f54cc728d..31495793e 100644
--- a/src/arch/arm/isa/decoder/thumb.isa
+++ b/src/arch/arm/isa/decoder/thumb.isa
@@ -95,8 +95,14 @@ decode BIGTHUMB {
                     0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStre();
                     0xf: decode HTOPCODE_9_4 {
                         0x00: Unknown::undefined();
-                        0x04: WarnUnimpl::mcrr(); // mcrr2
-                        0x05: WarnUnimpl::mrrc(); // mrrc2
+                        0x04: decode LTCOPROC {
+                            0xf: Mcrr15::Mcrr15();
+                            default: WarnUnimpl::mcrr(); // mcrr2
+                        }
+                        0x05: decode LTCOPROC {
+                            0xf: Mrrc15::Mrrc15();
+                            default: WarnUnimpl::mrrc(); // mrrc2
+                        }
                         0x02, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10,
                         0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e:
                             WarnUnimpl::stc(); // stc2
diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa
new file mode 100644
index 000000000..3ed70ce81
--- /dev/null
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -0,0 +1,2035 @@
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+//          Thomas Grocutt
+//          Mbou Eyole
+//          Giacomo Gabrielli
+
+output header {{
+namespace Aarch64
+{
+    StaticInstPtr decodeDataProcImm(ExtMachInst machInst);
+    StaticInstPtr decodeBranchExcSys(ExtMachInst machInst);
+    StaticInstPtr decodeLoadsStores(ExtMachInst machInst);
+    StaticInstPtr decodeDataProcReg(ExtMachInst machInst);
+
+    StaticInstPtr decodeFpAdvSIMD(ExtMachInst machInst);
+    StaticInstPtr decodeFp(ExtMachInst machInst);
+    StaticInstPtr decodeAdvSIMD(ExtMachInst machInst);
+    StaticInstPtr decodeAdvSIMDScalar(ExtMachInst machInst);
+
+    StaticInstPtr decodeGem5Ops(ExtMachInst machInst);
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeDataProcImm(ExtMachInst machInst)
+    {
+        IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+        IntRegIndex rdsp = makeSP(rd);
+        IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+        IntRegIndex rnsp = makeSP(rn);
+
+        uint8_t opc = bits(machInst, 30, 29);
+        bool sf = bits(machInst, 31);
+        bool n = bits(machInst, 22);
+        uint8_t immr = bits(machInst, 21, 16);
+        uint8_t imms = bits(machInst, 15, 10);
+        switch (bits(machInst, 25, 23)) {
+          case 0x0:
+          case 0x1:
+          {
+            uint64_t immlo = bits(machInst, 30, 29);
+            uint64_t immhi = bits(machInst, 23, 5);
+            uint64_t imm = (immlo << 0) | (immhi << 2);
+            if (bits(machInst, 31) == 0)
+                return new AdrXImm(machInst, rd, INTREG_ZERO, sext<21>(imm));
+            else
+                return new AdrpXImm(machInst, rd, INTREG_ZERO,
+                                    sext<33>(imm << 12));
+          }
+          case 0x2:
+          case 0x3:
+          {
+            uint32_t imm12 = bits(machInst, 21, 10);
+            uint8_t shift = bits(machInst, 23, 22);
+            uint32_t imm;
+            if (shift == 0x0)
+                imm = imm12 << 0;
+            else if (shift == 0x1)
+                imm = imm12 << 12;
+            else
+                return new Unknown64(machInst);
+            switch (opc) {
+              case 0x0:
+                return new AddXImm(machInst, rdsp, rnsp, imm);
+              case 0x1:
+                return new AddXImmCc(machInst, rd, rnsp, imm);
+              case 0x2:
+                return new SubXImm(machInst, rdsp, rnsp, imm);
+              case 0x3:
+                return new SubXImmCc(machInst, rd, rnsp, imm);
+            }
+          }
+          case 0x4:
+          {
+            if (!sf && n)
+                return new Unknown64(machInst);
+            // len = MSB(n:NOT(imms)), len < 1 is undefined.
+            uint8_t len = 0;
+            if (n) {
+                len = 6;
+            } else if (imms == 0x3f || imms == 0x3e) {
+                return new Unknown64(machInst);
+            } else {
+                len = findMsbSet(imms ^ 0x3f);
+            }
+            // Generate r, s, and size.
+            uint64_t r = bits(immr, len - 1, 0);
+            uint64_t s = bits(imms, len - 1, 0);
+            uint8_t size = 1 << len;
+            if (s == size - 1)
+                return new Unknown64(machInst);
+            // Generate the pattern with s 1s, rotated by r, with size bits.
+            uint64_t pattern = mask(s + 1);
+            if (r) {
+                pattern = (pattern >> r) | (pattern << (size - r));
+                pattern &= mask(size);
+            }
+            uint8_t width = sf ? 64 : 32;
+            // Replicate that to fill up the immediate.
+            for (unsigned i = 1; i < (width / size); i *= 2)
+                pattern |= (pattern << (i * size));
+            uint64_t imm = pattern;
+
+            switch (opc) {
+              case 0x0:
+                return new AndXImm(machInst, rdsp, rn, imm);
+              case 0x1:
+                return new OrrXImm(machInst, rdsp, rn, imm);
+              case 0x2:
+                return new EorXImm(machInst, rdsp, rn, imm);
+              case 0x3:
+                return new AndXImmCc(machInst, rd, rn, imm);
+            }
+          }
+          case 0x5:
+          {
+            IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+            uint32_t imm16 = bits(machInst, 20, 5);
+            uint32_t hw = bits(machInst, 22, 21);
+            switch (opc) {
+              case 0x0:
+                return new Movn(machInst, rd, imm16, hw * 16);
+              case 0x1:
+                return new Unknown64(machInst);
+              case 0x2:
+                return new Movz(machInst, rd, imm16, hw * 16);
+              case 0x3:
+                return new Movk(machInst, rd, imm16, hw * 16);
+            }
+          }
+          case 0x6:
+            if ((sf != n) || (!sf && (bits(immr, 5) || bits(imms, 5))))
+                return new Unknown64(machInst);
+            switch (opc) {
+              case 0x0:
+                return new Sbfm64(machInst, rd, rn, immr, imms);
+              case 0x1:
+                return new Bfm64(machInst, rd, rn, immr, imms);
+              case 0x2:
+                return new Ubfm64(machInst, rd, rn, immr, imms);
+              case 0x3:
+                return new Unknown64(machInst);
+            }
+          case 0x7:
+          {
+            IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+            if (opc || bits(machInst, 21))
+                return new Unknown64(machInst);
+            else
+                return new Extr64(machInst, rd, rn, rm, imms);
+          }
+        }
+        return new FailUnimplemented("Unhandled Case8", machInst);
+    }
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeBranchExcSys(ExtMachInst machInst)
+    {
+        switch (bits(machInst, 30, 29)) {
+          case 0x0:
+          {
+            int64_t imm = sext<26>(bits(machInst, 25, 0)) << 2;
+            if (bits(machInst, 31) == 0)
+                return new B64(machInst, imm);
+            else
+                return new Bl64(machInst, imm);
+          }
+          case 0x1:
+          {
+            IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+            if (bits(machInst, 25) == 0) {
+                int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2;
+                if (bits(machInst, 24) == 0)
+                    return new Cbz64(machInst, imm, rt);
+                else
+                    return new Cbnz64(machInst, imm, rt);
+            } else {
+                uint64_t bitmask = 0x1;
+                bitmask <<= bits(machInst, 23, 19);
+                int64_t imm = sext<14>(bits(machInst, 18, 5)) << 2;
+                if (bits(machInst, 31))
+                    bitmask <<= 32;
+                if (bits(machInst, 24) == 0)
+                    return new Tbz64(machInst, bitmask, imm, rt);
+                else
+                    return new Tbnz64(machInst, bitmask, imm, rt);
+            }
+          }
+          case 0x2:
+            // bit 30:26=10101
+            if (bits(machInst, 31) == 0) {
+                if (bits(machInst, 25, 24) || bits(machInst, 4))
+                    return new Unknown64(machInst);
+                int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2;
+                ConditionCode condCode =
+                    (ConditionCode)(uint8_t)(bits(machInst, 3, 0));
+                return new BCond64(machInst, imm, condCode);
+            } else if (bits(machInst, 25, 24) == 0x0) {
+                if (bits(machInst, 4, 2))
+                    return new Unknown64(machInst);
+                uint8_t decVal = (bits(machInst, 1, 0) << 0) |
+                                 (bits(machInst, 23, 21) << 2);
+                switch (decVal) {
+                  case 0x01:
+                    return new Svc64(machInst);
+                  case 0x02:
+                    return new FailUnimplemented("hvc", machInst);
+                  case 0x03:
+                    return new Smc64(machInst);
+                  case 0x04:
+                    return new FailUnimplemented("brk", machInst);
+                  case 0x08:
+                    return new FailUnimplemented("hlt", machInst);
+                  case 0x15:
+                    return new FailUnimplemented("dcps1", machInst);
+                  case 0x16:
+                    return new FailUnimplemented("dcps2", machInst);
+                  case 0x17:
+                    return new FailUnimplemented("dcps3", machInst);
+                  default:
+                    return new Unknown64(machInst);
+                }
+            } else if (bits(machInst, 25, 22) == 0x4) {
+                // bit 31:22=1101010100
+                bool l = bits(machInst, 21);
+                uint8_t op0 = bits(machInst, 20, 19);
+                uint8_t op1 = bits(machInst, 18, 16);
+                uint8_t crn = bits(machInst, 15, 12);
+                uint8_t crm = bits(machInst, 11, 8);
+                uint8_t op2 = bits(machInst, 7, 5);
+                IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+                switch (op0) {
+                  case 0x0:
+                    if (rt != 0x1f || l)
+                        return new Unknown64(machInst);
+                    if (crn == 0x2 && op1 == 0x3) {
+                        switch (op2) {
+                          case 0x0:
+                            return new NopInst(machInst);
+                          case 0x1:
+                            return new YieldInst(machInst);
+                          case 0x2:
+                            return new WfeInst(machInst);
+                          case 0x3:
+                            return new WfiInst(machInst);
+                          case 0x4:
+                            return new SevInst(machInst);
+                          case 0x5:
+                            return new SevlInst(machInst);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                    } else if (crn == 0x3 && op1 == 0x3) {
+                        switch (op2) {
+                          case 0x2:
+                            return new Clrex64(machInst);
+                          case 0x4:
+                            return new Dsb64(machInst);
+                          case 0x5:
+                            return new Dmb64(machInst);
+                          case 0x6:
+                            return new Isb64(machInst);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                    } else if (crn == 0x4) {
+                        // MSR immediate
+                        switch (op1 << 3 | op2) {
+                          case 0x5:
+                            // SP
+                            return new MsrSP64(machInst,
+                                               (IntRegIndex) MISCREG_SPSEL,
+                                               INTREG_ZERO,
+                                               crm & 0x1);
+                          case 0x1e:
+                            // DAIFSet
+                            return new MsrDAIFSet64(
+                                machInst,
+                                (IntRegIndex) MISCREG_DAIF,
+                                INTREG_ZERO,
+                                crm);
+                          case 0x1f:
+                            // DAIFClr
+                            return new MsrDAIFClr64(
+                                machInst,
+                                (IntRegIndex) MISCREG_DAIF,
+                                INTREG_ZERO,
+                                crm);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                    } else {
+                        return new Unknown64(machInst);
+                    }
+                    break;
+                  case 0x1:
+                  case 0x2:
+                  case 0x3:
+                  {
+                    // bit 31:22=1101010100, 20:19=11
+                    bool read = l;
+                    MiscRegIndex miscReg =
+                        decodeAArch64SysReg(op0, op1, crn, crm, op2);
+                    if (read) {
+                        if ((miscReg == MISCREG_DC_CIVAC_Xt) ||
+                            (miscReg == MISCREG_DC_CVAC_Xt) ||
+                            (miscReg == MISCREG_DC_ZVA_Xt)) {
+                            return new Unknown64(machInst);
+                        }
+                    }
+                    // Check for invalid registers
+                    if (miscReg == MISCREG_UNKNOWN) {
+                        return new Unknown64(machInst);
+                    } else if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) {
+                        if (miscReg == MISCREG_NZCV) {
+                            if (read)
+                                return new MrsNZCV64(machInst, rt, (IntRegIndex) miscReg);
+                            else
+                                return new MsrNZCV64(machInst, (IntRegIndex) miscReg, rt);
+                        }
+                        uint32_t iss = msrMrs64IssBuild(read, op0, op1, crn, crm, op2, rt);
+                        if (miscReg == MISCREG_DC_ZVA_Xt && !read)
+                            return new Dczva(machInst, rt, (IntRegIndex) miscReg, iss);
+
+                        if (read)
+                            return new Mrs64(machInst, rt, (IntRegIndex) miscReg, iss);
+                        else
+                            return new Msr64(machInst, (IntRegIndex) miscReg, rt, iss);
+                    } else if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) {
+                        std::string full_mnem = csprintf("%s %s",
+                            read ? "mrs" : "msr", miscRegName[miscReg]);
+                        return new WarnUnimplemented(read ? "mrs" : "msr",
+                                                     machInst, full_mnem);
+                    } else {
+                        return new FailUnimplemented(csprintf("%s %s",
+                            read ? "mrs" : "msr", miscRegName[miscReg]).c_str(),
+                            machInst);
+                    }
+                  }
+                  break;
+                }
+            } else if (bits(machInst, 25) == 0x1) {
+                uint8_t opc = bits(machInst, 24, 21);
+                uint8_t op2 = bits(machInst, 20, 16);
+                uint8_t op3 = bits(machInst, 15, 10);
+                IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+                uint8_t op4 = bits(machInst, 4, 0);
+                if (op2 != 0x1f || op3 != 0x0 || op4 != 0x0)
+                    return new Unknown64(machInst);
+                switch (opc) {
+                  case 0x0:
+                    return new Br64(machInst, rn);
+                  case 0x1:
+                    return new Blr64(machInst, rn);
+                  case 0x2:
+                    return new Ret64(machInst, rn);
+                  case 0x4:
+                    if (rn != 0x1f)
+                        return new Unknown64(machInst);
+                    return new Eret64(machInst);
+                  case 0x5:
+                    if (rn != 0x1f)
+                        return new Unknown64(machInst);
+                    return new FailUnimplemented("dret", machInst);
+                }
+            }
+          default:
+            return new Unknown64(machInst);
+        }
+        return new FailUnimplemented("Unhandled Case7", machInst);
+    }
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeLoadsStores(ExtMachInst machInst)
+    {
+        // bit 27,25=10
+        switch (bits(machInst, 29, 28)) {
+          case 0x0:
+            if (bits(machInst, 26) == 0) {
+                if (bits(machInst, 24) != 0)
+                    return new Unknown64(machInst);
+                IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+                IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+                IntRegIndex rnsp = makeSP(rn);
+                IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10);
+                IntRegIndex rs = (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+                uint8_t opc = (bits(machInst, 15) << 0) |
+                              (bits(machInst, 23, 21) << 1);
+                uint8_t size = bits(machInst, 31, 30);
+                switch (opc) {
+                  case 0x0:
+                    switch (size) {
+                      case 0x0:
+                        return new STXRB64(machInst, rt, rnsp, rs);
+                      case 0x1:
+                        return new STXRH64(machInst, rt, rnsp, rs);
+                      case 0x2:
+                        return new STXRW64(machInst, rt, rnsp, rs);
+                      case 0x3:
+                        return new STXRX64(machInst, rt, rnsp, rs);
+                    }
+                  case 0x1:
+                    switch (size) {
+                      case 0x0:
+                        return new STLXRB64(machInst, rt, rnsp, rs);
+                      case 0x1:
+                        return new STLXRH64(machInst, rt, rnsp, rs);
+                      case 0x2:
+                        return new STLXRW64(machInst, rt, rnsp, rs);
+                      case 0x3:
+                        return new STLXRX64(machInst, rt, rnsp, rs);
+                    }
+                  case 0x2:
+                    switch (size) {
+                      case 0x0:
+                      case 0x1:
+                        return new Unknown64(machInst);
+                      case 0x2:
+                        return new STXPW64(machInst, rs, rt, rt2, rnsp);
+                      case 0x3:
+                        return new STXPX64(machInst, rs, rt, rt2, rnsp);
+                    }
+
+                  case 0x3:
+                    switch (size) {
+                      case 0x0:
+                      case 0x1:
+                        return new Unknown64(machInst);
+                      case 0x2:
+                        return new STLXPW64(machInst, rs, rt, rt2, rnsp);
+                      case 0x3:
+                        return new STLXPX64(machInst, rs, rt, rt2, rnsp);
+                    }
+
+                  case 0x4:
+                    switch (size) {
+                      case 0x0:
+                        return new LDXRB64(machInst, rt, rnsp, rs);
+                      case 0x1:
+                        return new LDXRH64(machInst, rt, rnsp, rs);
+                      case 0x2:
+                        return new LDXRW64(machInst, rt, rnsp, rs);
+                      case 0x3:
+                        return new LDXRX64(machInst, rt, rnsp, rs);
+                    }
+                  case 0x5:
+                    switch (size) {
+                      case 0x0:
+                        return new LDAXRB64(machInst, rt, rnsp, rs);
+                      case 0x1:
+                        return new LDAXRH64(machInst, rt, rnsp, rs);
+                      case 0x2:
+                        return new LDAXRW64(machInst, rt, rnsp, rs);
+                      case 0x3:
+                        return new LDAXRX64(machInst, rt, rnsp, rs);
+                    }
+                  case 0x6:
+                    switch (size) {
+                      case 0x0:
+                      case 0x1:
+                        return new Unknown64(machInst);
+                      case 0x2:
+                        return new LDXPW64(machInst, rt, rt2, rnsp);
+                      case 0x3:
+                        return new LDXPX64(machInst, rt, rt2, rnsp);
+                    }
+
+                  case 0x7:
+                    switch (size) {
+                      case 0x0:
+                      case 0x1:
+                        return new Unknown64(machInst);
+                      case 0x2:
+                        return new LDAXPW64(machInst, rt, rt2, rnsp);
+                      case 0x3:
+                        return new LDAXPX64(machInst, rt, rt2, rnsp);
+                    }
+
+                  case 0x9:
+                    switch (size) {
+                      case 0x0:
+                        return new STLRB64(machInst, rt, rnsp);
+                      case 0x1:
+                        return new STLRH64(machInst, rt, rnsp);
+                      case 0x2:
+                        return new STLRW64(machInst, rt, rnsp);
+                      case 0x3:
+                        return new STLRX64(machInst, rt, rnsp);
+                    }
+                  case 0xd:
+                    switch (size) {
+                      case 0x0:
+                        return new LDARB64(machInst, rt, rnsp);
+                      case 0x1:
+                        return new LDARH64(machInst, rt, rnsp);
+                      case 0x2:
+                        return new LDARW64(machInst, rt, rnsp);
+                      case 0x3:
+                        return new LDARX64(machInst, rt, rnsp);
+                    }
+                  default:
+                    return new Unknown64(machInst);
+                }
+            } else if (bits(machInst, 31)) {
+                return new Unknown64(machInst);
+            } else {
+                return decodeNeonMem(machInst);
+            }
+          case 0x1:
+          {
+            if (bits(machInst, 24) != 0)
+                return new Unknown64(machInst);
+            uint8_t switchVal = (bits(machInst, 26) << 0) |
+                                (bits(machInst, 31, 30) << 1);
+            int64_t imm = sext<19>(bits(machInst, 23, 5)) << 2;
+            IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+            switch (switchVal) {
+              case 0x0:
+                return new LDRWL64_LIT(machInst, rt, imm);
+              case 0x1:
+                return new LDRSFP64_LIT(machInst, rt, imm);
+              case 0x2:
+                return new LDRXL64_LIT(machInst, rt, imm);
+              case 0x3:
+                return new LDRDFP64_LIT(machInst, rt, imm);
+              case 0x4:
+                return new LDRSWL64_LIT(machInst, rt, imm);
+              case 0x5:
+                return new BigFpMemLit("ldr", machInst, rt, imm);
+              case 0x6:
+                return new PRFM64_LIT(machInst, rt, imm);
+              default:
+                return new Unknown64(machInst);
+            }
+          }
+          case 0x2:
+          {
+            uint8_t opc = bits(machInst, 31, 30);
+            if (opc >= 3)
+                return new Unknown64(machInst);
+            uint32_t size = 0;
+            bool fp = bits(machInst, 26);
+            bool load = bits(machInst, 22);
+            if (fp) {
+                size = 4 << opc;
+            } else {
+                if ((opc == 1) && !load)
+                    return new Unknown64(machInst);
+                size = (opc == 0 || opc == 1) ? 4 : 8;
+            }
+            uint8_t type = bits(machInst, 24, 23);
+            int64_t imm = sext<7>(bits(machInst, 21, 15)) * size;
+
+            IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+            IntRegIndex rt = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+            IntRegIndex rt2 = (IntRegIndex)(uint8_t)bits(machInst, 14, 10);
+
+            bool noAlloc = (type == 0);
+            bool signExt = !noAlloc && !fp && opc == 1;
+            PairMemOp::AddrMode mode;
+            const char *mnemonic = NULL;
+            switch (type) {
+              case 0x0:
+              case 0x2:
+                mode = PairMemOp::AddrMd_Offset;
+                break;
+              case 0x1:
+                mode = PairMemOp::AddrMd_PostIndex;
+                break;
+              case 0x3:
+                mode = PairMemOp::AddrMd_PreIndex;
+                break;
+              default:
+                return new Unknown64(machInst);
+            }
+            if (load) {
+                if (noAlloc)
+                    mnemonic = "ldnp";
+                else if (signExt)
+                    mnemonic = "ldpsw";
+                else
+                    mnemonic = "ldp";
+            } else {
+                if (noAlloc)
+                    mnemonic = "stnp";
+                else
+                    mnemonic = "stp";
+            }
+
+            return new LdpStp(mnemonic, machInst, size, fp, load, noAlloc,
+                    signExt, false, false, imm, mode, rn, rt, rt2);
+          }
+          // bit 29:27=111, 25=0
+          case 0x3:
+          {
+            uint8_t switchVal = (bits(machInst, 23, 22) << 0) |
+                                (bits(machInst, 26) << 2) |
+                                (bits(machInst, 31, 30) << 3);
+            if (bits(machInst, 24) == 1) {
+                uint64_t imm12 = bits(machInst, 21, 10);
+                IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+                IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+                IntRegIndex rnsp = makeSP(rn);
+                switch (switchVal) {
+                  case 0x00:
+                    return new STRB64_IMM(machInst, rt, rnsp, imm12);
+                  case 0x01:
+                    return new LDRB64_IMM(machInst, rt, rnsp, imm12);
+                  case 0x02:
+                    return new LDRSBX64_IMM(machInst, rt, rnsp, imm12);
+                  case 0x03:
+                    return new LDRSBW64_IMM(machInst, rt, rnsp, imm12);
+                  case 0x04:
+                    return new STRBFP64_IMM(machInst, rt, rnsp, imm12);
+                  case 0x05:
+                    return new LDRBFP64_IMM(machInst, rt, rnsp, imm12);
+                  case 0x06:
+                    return new BigFpMemImm("str", machInst, false,
+                                           rt, rnsp, imm12 << 4);
+                  case 0x07:
+                    return new BigFpMemImm("ldr", machInst, true,
+                                           rt, rnsp, imm12 << 4);
+                  case 0x08:
+                    return new STRH64_IMM(machInst, rt, rnsp, imm12 << 1);
+                  case 0x09:
+                    return new LDRH64_IMM(machInst, rt, rnsp, imm12 << 1);
+                  case 0x0a:
+                    return new LDRSHX64_IMM(machInst, rt, rnsp, imm12 << 1);
+                  case 0x0b:
+                    return new LDRSHW64_IMM(machInst, rt, rnsp, imm12 << 1);
+                  case 0x0c:
+                    return new STRHFP64_IMM(machInst, rt, rnsp, imm12 << 1);
+                  case 0x0d:
+                    return new LDRHFP64_IMM(machInst, rt, rnsp, imm12 << 1);
+                  case 0x10:
+                    return new STRW64_IMM(machInst, rt, rnsp, imm12 << 2);
+                  case 0x11:
+                    return new LDRW64_IMM(machInst, rt, rnsp, imm12 << 2);
+                  case 0x12:
+                    return new LDRSW64_IMM(machInst, rt, rnsp, imm12 << 2);
+                  case 0x14:
+                    return new STRSFP64_IMM(machInst, rt, rnsp, imm12 << 2);
+                  case 0x15:
+                    return new LDRSFP64_IMM(machInst, rt, rnsp, imm12 << 2);
+                  case 0x18:
+                    return new STRX64_IMM(machInst, rt, rnsp, imm12 << 3);
+                  case 0x19:
+                    return new LDRX64_IMM(machInst, rt, rnsp, imm12 << 3);
+                  case 0x1a:
+                    return new PRFM64_IMM(machInst, rt, rnsp, imm12 << 3);
+                  case 0x1c:
+                    return new STRDFP64_IMM(machInst, rt, rnsp, imm12 << 3);
+                  case 0x1d:
+                    return new LDRDFP64_IMM(machInst, rt, rnsp, imm12 << 3);
+                  default:
+                    return new Unknown64(machInst);
+                }
+            } else if (bits(machInst, 21) == 1) {
+                if (bits(machInst, 11, 10) != 0x2)
+                    return new Unknown64(machInst);
+                if (!bits(machInst, 14))
+                    return new Unknown64(machInst);
+                IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+                IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+                IntRegIndex rnsp = makeSP(rn);
+                IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16);
+                ArmExtendType type =
+                    (ArmExtendType)(uint32_t)bits(machInst, 15, 13);
+                uint8_t s = bits(machInst, 12);
+                switch (switchVal) {
+                  case 0x00:
+                    return new STRB64_REG(machInst, rt, rnsp, rm, type, 0);
+                  case 0x01:
+                    return new LDRB64_REG(machInst, rt, rnsp, rm, type, 0);
+                  case 0x02:
+                    return new LDRSBX64_REG(machInst, rt, rnsp, rm, type, 0);
+                  case 0x03:
+                    return new LDRSBW64_REG(machInst, rt, rnsp, rm, type, 0);
+                  case 0x04:
+                    return new STRBFP64_REG(machInst, rt, rnsp, rm, type, 0);
+                  case 0x05:
+                    return new LDRBFP64_REG(machInst, rt, rnsp, rm, type, 0);
+                  case 0x6:
+                    return new BigFpMemReg("str", machInst, false,
+                                           rt, rnsp, rm, type, s * 4);
+                  case 0x7:
+                    return new BigFpMemReg("ldr", machInst, true,
+                                           rt, rnsp, rm, type, s * 4);
+                  case 0x08:
+                    return new STRH64_REG(machInst, rt, rnsp, rm, type, s);
+                  case 0x09:
+                    return new LDRH64_REG(machInst, rt, rnsp, rm, type, s);
+                  case 0x0a:
+                    return new LDRSHX64_REG(machInst, rt, rnsp, rm, type, s);
+                  case 0x0b:
+                    return new LDRSHW64_REG(machInst, rt, rnsp, rm, type, s);
+                  case 0x0c:
+                    return new STRHFP64_REG(machInst, rt, rnsp, rm, type, s);
+                  case 0x0d:
+                    return new LDRHFP64_REG(machInst, rt, rnsp, rm, type, s);
+                  case 0x10:
+                    return new STRW64_REG(machInst, rt, rnsp, rm, type, s * 2);
+                  case 0x11:
+                    return new LDRW64_REG(machInst, rt, rnsp, rm, type, s * 2);
+                  case 0x12:
+                    return new LDRSW64_REG(machInst, rt, rnsp, rm, type, s * 2);
+                  case 0x14:
+                    return new STRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2);
+                  case 0x15:
+                    return new LDRSFP64_REG(machInst, rt, rnsp, rm, type, s * 2);
+                  case 0x18:
+                    return new STRX64_REG(machInst, rt, rnsp, rm, type, s * 3);
+                  case 0x19:
+                    return new LDRX64_REG(machInst, rt, rnsp, rm, type, s * 3);
+                  case 0x1a:
+                    return new PRFM64_REG(machInst, rt, rnsp, rm, type, s * 3);
+                  case 0x1c:
+                    return new STRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3);
+                  case 0x1d:
+                    return new LDRDFP64_REG(machInst, rt, rnsp, rm, type, s * 3);
+                  default:
+                    return new Unknown64(machInst);
+                }
+            } else {
+                // bit 29:27=111, 25:24=00, 21=0
+                switch (bits(machInst, 11, 10)) {
+                  case 0x0:
+                  {
+                    IntRegIndex rt =
+                        (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+                    IntRegIndex rn =
+                        (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+                    IntRegIndex rnsp = makeSP(rn);
+                    uint64_t imm = sext<9>(bits(machInst, 20, 12));
+                    switch (switchVal) {
+                      case 0x00:
+                        return new STURB64_IMM(machInst, rt, rnsp, imm);
+                      case 0x01:
+                        return new LDURB64_IMM(machInst, rt, rnsp, imm);
+                      case 0x02:
+                        return new LDURSBX64_IMM(machInst, rt, rnsp, imm);
+                      case 0x03:
+                        return new LDURSBW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x04:
+                        return new STURBFP64_IMM(machInst, rt, rnsp, imm);
+                      case 0x05:
+                        return new LDURBFP64_IMM(machInst, rt, rnsp, imm);
+                      case 0x06:
+                        return new BigFpMemImm("stur", machInst, false,
+                                               rt, rnsp, imm);
+                      case 0x07:
+                        return new BigFpMemImm("ldur", machInst, true,
+                                               rt, rnsp, imm);
+                      case 0x08:
+                        return new STURH64_IMM(machInst, rt, rnsp, imm);
+                      case 0x09:
+                        return new LDURH64_IMM(machInst, rt, rnsp, imm);
+                      case 0x0a:
+                        return new LDURSHX64_IMM(machInst, rt, rnsp, imm);
+                      case 0x0b:
+                        return new LDURSHW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x0c:
+                        return new STURHFP64_IMM(machInst, rt, rnsp, imm);
+                      case 0x0d:
+                        return new LDURHFP64_IMM(machInst, rt, rnsp, imm);
+                      case 0x10:
+                        return new STURW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x11:
+                        return new LDURW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x12:
+                        return new LDURSW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x14:
+                        return new STURSFP64_IMM(machInst, rt, rnsp, imm);
+                      case 0x15:
+                        return new LDURSFP64_IMM(machInst, rt, rnsp, imm);
+                      case 0x18:
+                        return new STURX64_IMM(machInst, rt, rnsp, imm);
+                      case 0x19:
+                        return new LDURX64_IMM(machInst, rt, rnsp, imm);
+                      case 0x1a:
+                        return new PRFUM64_IMM(machInst, rt, rnsp, imm);
+                      case 0x1c:
+                        return new STURDFP64_IMM(machInst, rt, rnsp, imm);
+                      case 0x1d:
+                        return new LDURDFP64_IMM(machInst, rt, rnsp, imm);
+                      default:
+                        return new Unknown64(machInst);
+                    }
+                  }
+                  // bit 29:27=111, 25:24=00, 21=0, 11:10=01
+                  case 0x1:
+                  {
+                    IntRegIndex rt =
+                        (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+                    IntRegIndex rn =
+                        (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+                    IntRegIndex rnsp = makeSP(rn);
+                    uint64_t imm = sext<9>(bits(machInst, 20, 12));
+                    switch (switchVal) {
+                      case 0x00:
+                        return new STRB64_POST(machInst, rt, rnsp, imm);
+                      case 0x01:
+                        return new LDRB64_POST(machInst, rt, rnsp, imm);
+                      case 0x02:
+                        return new LDRSBX64_POST(machInst, rt, rnsp, imm);
+                      case 0x03:
+                        return new LDRSBW64_POST(machInst, rt, rnsp, imm);
+                      case 0x04:
+                        return new STRBFP64_POST(machInst, rt, rnsp, imm);
+                      case 0x05:
+                        return new LDRBFP64_POST(machInst, rt, rnsp, imm);
+                      case 0x06:
+                        return new BigFpMemPost("str", machInst, false,
+                                                rt, rnsp, imm);
+                      case 0x07:
+                        return new BigFpMemPost("ldr", machInst, true,
+                                                rt, rnsp, imm);
+                      case 0x08:
+                        return new STRH64_POST(machInst, rt, rnsp, imm);
+                      case 0x09:
+                        return new LDRH64_POST(machInst, rt, rnsp, imm);
+                      case 0x0a:
+                        return new LDRSHX64_POST(machInst, rt, rnsp, imm);
+                      case 0x0b:
+                        return new LDRSHW64_POST(machInst, rt, rnsp, imm);
+                      case 0x0c:
+                        return new STRHFP64_POST(machInst, rt, rnsp, imm);
+                      case 0x0d:
+                        return new LDRHFP64_POST(machInst, rt, rnsp, imm);
+                      case 0x10:
+                        return new STRW64_POST(machInst, rt, rnsp, imm);
+                      case 0x11:
+                        return new LDRW64_POST(machInst, rt, rnsp, imm);
+                      case 0x12:
+                        return new LDRSW64_POST(machInst, rt, rnsp, imm);
+                      case 0x14:
+                        return new STRSFP64_POST(machInst, rt, rnsp, imm);
+                      case 0x15:
+                        return new LDRSFP64_POST(machInst, rt, rnsp, imm);
+                      case 0x18:
+                        return new STRX64_POST(machInst, rt, rnsp, imm);
+                      case 0x19:
+                        return new LDRX64_POST(machInst, rt, rnsp, imm);
+                      case 0x1c:
+                        return new STRDFP64_POST(machInst, rt, rnsp, imm);
+                      case 0x1d:
+                        return new LDRDFP64_POST(machInst, rt, rnsp, imm);
+                      default:
+                        return new Unknown64(machInst);
+                    }
+                  }
+                  case 0x2:
+                  {
+                    IntRegIndex rt =
+                        (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+                    IntRegIndex rn =
+                        (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+                    IntRegIndex rnsp = makeSP(rn);
+                    uint64_t imm = sext<9>(bits(machInst, 20, 12));
+                    switch (switchVal) {
+                      case 0x00:
+                        return new STTRB64_IMM(machInst, rt, rnsp, imm);
+                      case 0x01:
+                        return new LDTRB64_IMM(machInst, rt, rnsp, imm);
+                      case 0x02:
+                        return new LDTRSBX64_IMM(machInst, rt, rnsp, imm);
+                      case 0x03:
+                        return new LDTRSBW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x08:
+                        return new STTRH64_IMM(machInst, rt, rnsp, imm);
+                      case 0x09:
+                        return new LDTRH64_IMM(machInst, rt, rnsp, imm);
+                      case 0x0a:
+                        return new LDTRSHX64_IMM(machInst, rt, rnsp, imm);
+                      case 0x0b:
+                        return new LDTRSHW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x10:
+                        return new STTRW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x11:
+                        return new LDTRW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x12:
+                        return new LDTRSW64_IMM(machInst, rt, rnsp, imm);
+                      case 0x18:
+                        return new STTRX64_IMM(machInst, rt, rnsp, imm);
+                      case 0x19:
+                        return new LDTRX64_IMM(machInst, rt, rnsp, imm);
+                      default:
+                        return new Unknown64(machInst);
+                    }
+                  }
+                  case 0x3:
+                  {
+                    IntRegIndex rt =
+                        (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+                    IntRegIndex rn =
+                        (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+                    IntRegIndex rnsp = makeSP(rn);
+                    uint64_t imm = sext<9>(bits(machInst, 20, 12));
+                    switch (switchVal) {
+                      case 0x00:
+                        return new STRB64_PRE(machInst, rt, rnsp, imm);
+                      case 0x01:
+                        return new LDRB64_PRE(machInst, rt, rnsp, imm);
+                      case 0x02:
+                        return new LDRSBX64_PRE(machInst, rt, rnsp, imm);
+                      case 0x03:
+                        return new LDRSBW64_PRE(machInst, rt, rnsp, imm);
+                      case 0x04:
+                        return new STRBFP64_PRE(machInst, rt, rnsp, imm);
+                      case 0x05:
+                        return new LDRBFP64_PRE(machInst, rt, rnsp, imm);
+                      case 0x06:
+                        return new BigFpMemPre("str", machInst, false,
+                                               rt, rnsp, imm);
+                      case 0x07:
+                        return new BigFpMemPre("ldr", machInst, true,
+                                               rt, rnsp, imm);
+                      case 0x08:
+                        return new STRH64_PRE(machInst, rt, rnsp, imm);
+                      case 0x09:
+                        return new LDRH64_PRE(machInst, rt, rnsp, imm);
+                      case 0x0a:
+                        return new LDRSHX64_PRE(machInst, rt, rnsp, imm);
+                      case 0x0b:
+                        return new LDRSHW64_PRE(machInst, rt, rnsp, imm);
+                      case 0x0c:
+                        return new STRHFP64_PRE(machInst, rt, rnsp, imm);
+                      case 0x0d:
+                        return new LDRHFP64_PRE(machInst, rt, rnsp, imm);
+                      case 0x10:
+                        return new STRW64_PRE(machInst, rt, rnsp, imm);
+                      case 0x11:
+                        return new LDRW64_PRE(machInst, rt, rnsp, imm);
+                      case 0x12:
+                        return new LDRSW64_PRE(machInst, rt, rnsp, imm);
+                      case 0x14:
+                        return new STRSFP64_PRE(machInst, rt, rnsp, imm);
+                      case 0x15:
+                        return new LDRSFP64_PRE(machInst, rt, rnsp, imm);
+                      case 0x18:
+                        return new STRX64_PRE(machInst, rt, rnsp, imm);
+                      case 0x19:
+                        return new LDRX64_PRE(machInst, rt, rnsp, imm);
+                      case 0x1c:
+                        return new STRDFP64_PRE(machInst, rt, rnsp, imm);
+                      case 0x1d:
+                        return new LDRDFP64_PRE(machInst, rt, rnsp, imm);
+                      default:
+                        return new Unknown64(machInst);
+                    }
+                  }
+                }
+            }
+          }
+        }
+        return new FailUnimplemented("Unhandled Case1", machInst);
+    }
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeDataProcReg(ExtMachInst machInst)
+    {
+        uint8_t switchVal = (bits(machInst, 28) << 1) |
+                            (bits(machInst, 24) << 0);
+        switch (switchVal) {
+          case 0x0:
+          {
+            uint8_t switchVal = (bits(machInst, 21) << 0) |
+                                (bits(machInst, 30, 29) << 1);
+            ArmShiftType type = (ArmShiftType)(uint8_t)bits(machInst, 23, 22);
+            uint8_t imm6 = bits(machInst, 15, 10);
+            bool sf = bits(machInst, 31);
+            if (!sf && (imm6 & 0x20))
+                return new Unknown64(machInst);
+            IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+            IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+            IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+
+            switch (switchVal) {
+              case 0x0:
+                return new AndXSReg(machInst, rd, rn, rm, imm6, type);
+              case 0x1:
+                return new BicXSReg(machInst, rd, rn, rm, imm6, type);
+              case 0x2:
+                return new OrrXSReg(machInst, rd, rn, rm, imm6, type);
+              case 0x3:
+                return new OrnXSReg(machInst, rd, rn, rm, imm6, type);
+              case 0x4:
+                return new EorXSReg(machInst, rd, rn, rm, imm6, type);
+              case 0x5:
+                return new EonXSReg(machInst, rd, rn, rm, imm6, type);
+              case 0x6:
+                return new AndXSRegCc(machInst, rd, rn, rm, imm6, type);
+              case 0x7:
+                return new BicXSRegCc(machInst, rd, rn, rm, imm6, type);
+            }
+          }
+          case 0x1:
+          {
+            uint8_t switchVal = bits(machInst, 30, 29);
+            if (bits(machInst, 21) == 0) {
+                ArmShiftType type =
+                    (ArmShiftType)(uint8_t)bits(machInst, 23, 22);
+                if (type == ROR)
+                    return new Unknown64(machInst);
+                uint8_t imm6 = bits(machInst, 15, 10);
+                if (!bits(machInst, 31) && bits(imm6, 5))
+                    return new Unknown64(machInst);
+                IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+                IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+                IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+                switch (switchVal) {
+                  case 0x0:
+                    return new AddXSReg(machInst, rd, rn, rm, imm6, type);
+                  case 0x1:
+                    return new AddXSRegCc(machInst, rd, rn, rm, imm6, type);
+                  case 0x2:
+                    return new SubXSReg(machInst, rd, rn, rm, imm6, type);
+                  case 0x3:
+                    return new SubXSRegCc(machInst, rd, rn, rm, imm6, type);
+                }
+            } else {
+                if (bits(machInst, 23, 22) != 0 || bits(machInst, 12, 10) > 0x4)
+                   return new Unknown64(machInst);
+                ArmExtendType type =
+                    (ArmExtendType)(uint8_t)bits(machInst, 15, 13);
+                uint8_t imm3 = bits(machInst, 12, 10);
+                IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+                IntRegIndex rdsp = makeSP(rd);
+                IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+                IntRegIndex rnsp = makeSP(rn);
+                IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+
+                switch (switchVal) {
+                  case 0x0:
+                    return new AddXEReg(machInst, rdsp, rnsp, rm, type, imm3);
+                  case 0x1:
+                    return new AddXERegCc(machInst, rd, rnsp, rm, type, imm3);
+                  case 0x2:
+                    return new SubXEReg(machInst, rdsp, rnsp, rm, type, imm3);
+                  case 0x3:
+                    return new SubXERegCc(machInst, rd, rnsp, rm, type, imm3);
+                }
+            }
+          }
+          case 0x2:
+          {
+            if (bits(machInst, 21) == 1)
+                return new Unknown64(machInst);
+            IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+            IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+            IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+            switch (bits(machInst, 23, 22)) {
+              case 0x0:
+              {
+                if (bits(machInst, 15, 10))
+                    return new Unknown64(machInst);
+                uint8_t switchVal = bits(machInst, 30, 29);
+                switch (switchVal) {
+                  case 0x0:
+                    return new AdcXSReg(machInst, rd, rn, rm, 0, LSL);
+                  case 0x1:
+                    return new AdcXSRegCc(machInst, rd, rn, rm, 0, LSL);
+                  case 0x2:
+                    return new SbcXSReg(machInst, rd, rn, rm, 0, LSL);
+                  case 0x3:
+                    return new SbcXSRegCc(machInst, rd, rn, rm, 0, LSL);
+                }
+              }
+              case 0x1:
+              {
+                if ((bits(machInst, 4) == 1) ||
+                        (bits(machInst, 10) == 1) ||
+                        (bits(machInst, 29) == 0)) {
+                    return new Unknown64(machInst);
+                }
+                ConditionCode cond =
+                    (ConditionCode)(uint8_t)bits(machInst, 15, 12);
+                uint8_t flags = bits(machInst, 3, 0);
+                IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+                if (bits(machInst, 11) == 0) {
+                    IntRegIndex rm =
+                        (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+                    if (bits(machInst, 30) == 0) {
+                        return new CcmnReg64(machInst, rn, rm, cond, flags);
+                    } else {
+                        return new CcmpReg64(machInst, rn, rm, cond, flags);
+                    }
+                } else {
+                    uint8_t imm5 = bits(machInst, 20, 16);
+                    if (bits(machInst, 30) == 0) {
+                        return new CcmnImm64(machInst, rn, imm5, cond, flags);
+                    } else {
+                        return new CcmpImm64(machInst, rn, imm5, cond, flags);
+                    }
+                }
+              }
+              case 0x2:
+              {
+                if (bits(machInst, 29) == 1 ||
+                        bits(machInst, 11) == 1) {
+                    return new Unknown64(machInst);
+                }
+                uint8_t switchVal = (bits(machInst, 10) << 0) |
+                                    (bits(machInst, 30) << 1);
+                IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+                IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+                IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+                ConditionCode cond =
+                    (ConditionCode)(uint8_t)bits(machInst, 15, 12);
+                switch (switchVal) {
+                  case 0x0:
+                    return new Csel64(machInst, rd, rn, rm, cond);
+                  case 0x1:
+                    return new Csinc64(machInst, rd, rn, rm, cond);
+                  case 0x2:
+                    return new Csinv64(machInst, rd, rn, rm, cond);
+                  case 0x3:
+                    return new Csneg64(machInst, rd, rn, rm, cond);
+                }
+              }
+              case 0x3:
+                if (bits(machInst, 30) == 0) {
+                    if (bits(machInst, 29) != 0)
+                        return new Unknown64(machInst);
+                    uint8_t switchVal = bits(machInst, 15, 10);
+                    switch (switchVal) {
+                      case 0x2:
+                        return new Udiv64(machInst, rd, rn, rm);
+                      case 0x3:
+                        return new Sdiv64(machInst, rd, rn, rm);
+                      case 0x8:
+                        return new Lslv64(machInst, rd, rn, rm);
+                      case 0x9:
+                        return new Lsrv64(machInst, rd, rn, rm);
+                      case 0xa:
+                        return new Asrv64(machInst, rd, rn, rm);
+                      case 0xb:
+                        return new Rorv64(machInst, rd, rn, rm);
+                      default:
+                        return new Unknown64(machInst);
+                    }
+                } else {
+                    if (bits(machInst, 20, 16) != 0 ||
+                            bits(machInst, 29) != 0) {
+                        return new Unknown64(machInst);
+                    }
+                    uint8_t switchVal = bits(machInst, 15, 10);
+                    switch (switchVal) {
+                      case 0x0:
+                        return new Rbit64(machInst, rd, rn);
+                      case 0x1:
+                        return new Rev1664(machInst, rd, rn);
+                      case 0x2:
+                        if (bits(machInst, 31) == 0)
+                            return new Rev64(machInst, rd, rn);
+                        else
+                            return new Rev3264(machInst, rd, rn);
+                      case 0x3:
+                        if (bits(machInst, 31) != 1)
+                            return new Unknown64(machInst);
+                        return new Rev64(machInst, rd, rn);
+                      case 0x4:
+                        return new Clz64(machInst, rd, rn);
+                      case 0x5:
+                        return new Cls64(machInst, rd, rn);
+                    }
+                }
+            }
+          }
+          case 0x3:
+          {
+            if (bits(machInst, 30, 29) != 0x0 ||
+                    (bits(machInst, 23, 21) != 0 && bits(machInst, 31) == 0))
+                return new Unknown64(machInst);
+            IntRegIndex rd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+            IntRegIndex rn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+            IntRegIndex ra = (IntRegIndex)(uint8_t)bits(machInst, 14, 10);
+            IntRegIndex rm = (IntRegIndex)(uint8_t)bits(machInst, 20, 16);
+            switch (bits(machInst, 23, 21)) {
+              case 0x0:
+                if (bits(machInst, 15) == 0)
+                    return new Madd64(machInst, rd, ra, rn, rm);
+                else
+                    return new Msub64(machInst, rd, ra, rn, rm);
+              case 0x1:
+                if (bits(machInst, 15) == 0)
+                    return new Smaddl64(machInst, rd, ra, rn, rm);
+                else
+                    return new Smsubl64(machInst, rd, ra, rn, rm);
+              case 0x2:
+                if (bits(machInst, 15) != 0)
+                    return new Unknown64(machInst);
+                return new Smulh64(machInst, rd, rn, rm);
+              case 0x5:
+                if (bits(machInst, 15) == 0)
+                    return new Umaddl64(machInst, rd, ra, rn, rm);
+                else
+                    return new Umsubl64(machInst, rd, ra, rn, rm);
+              case 0x6:
+                if (bits(machInst, 15) != 0)
+                    return new Unknown64(machInst);
+                return new Umulh64(machInst, rd, rn, rm);
+              default:
+                return new Unknown64(machInst);
+            }
+          }
+        }
+        return new FailUnimplemented("Unhandled Case2", machInst);
+    }
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeAdvSIMD(ExtMachInst machInst)
+    {
+        if (bits(machInst, 24) == 1) {
+            if (bits(machInst, 10) == 0) {
+                return decodeNeonIndexedElem(machInst);
+            } else if (bits(machInst, 23) == 1) {
+                return new Unknown64(machInst);
+            } else {
+                if (bits(machInst, 22, 19)) {
+                    return decodeNeonShiftByImm(machInst);
+                } else {
+                    return decodeNeonModImm(machInst);
+                }
+            }
+        } else if (bits(machInst, 21) == 1) {
+            if (bits(machInst, 10) == 1) {
+                return decodeNeon3Same(machInst);
+            } else if (bits(machInst, 11) == 0) {
+                return decodeNeon3Diff(machInst);
+            } else if (bits(machInst, 20, 17) == 0x0) {
+                return decodeNeon2RegMisc(machInst);
+            } else if (bits(machInst, 20, 17) == 0x8) {
+                return decodeNeonAcrossLanes(machInst);
+            } else {
+                return new Unknown64(machInst);
+            }
+        } else if (bits(machInst, 24) ||
+                   bits(machInst, 21) ||
+                   bits(machInst, 15)) {
+            return new Unknown64(machInst);
+        } else if (bits(machInst, 10) == 1) {
+            if (bits(machInst, 23, 22))
+                return new Unknown64(machInst);
+            return decodeNeonCopy(machInst);
+        } else if (bits(machInst, 29) == 1) {
+            return decodeNeonExt(machInst);
+        } else if (bits(machInst, 11) == 1) {
+            return decodeNeonZipUzpTrn(machInst);
+        } else if (bits(machInst, 23, 22) == 0x0) {
+            return decodeNeonTblTbx(machInst);
+        } else {
+            return new Unknown64(machInst);
+        }
+        return new FailUnimplemented("Unhandled Case3", machInst);
+    }
+}
+}};
+
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    // bit 30=0, 28:25=1111
+    decodeFp(ExtMachInst machInst)
+    {
+        if (bits(machInst, 24) == 1) {
+            if (bits(machInst, 31) || bits(machInst, 29))
+                return new Unknown64(machInst);
+            IntRegIndex rd    = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+            IntRegIndex rn    = (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+            IntRegIndex rm    = (IntRegIndex)(uint32_t)bits(machInst, 20, 16);
+            IntRegIndex ra    = (IntRegIndex)(uint32_t)bits(machInst, 14, 10);
+            uint8_t switchVal = (bits(machInst, 23, 21) << 1) |
+                                (bits(machInst, 15)     << 0);
+            switch (switchVal) {
+              case 0x0: // FMADD Sd = Sa + Sn*Sm
+                return new FMAddS(machInst, rd, rn, rm, ra);
+              case 0x1: // FMSUB Sd = Sa + (-Sn)*Sm
+                return new FMSubS(machInst, rd, rn, rm, ra);
+              case 0x2: // FNMADD Sd = (-Sa) + (-Sn)*Sm
+                return new FNMAddS(machInst, rd, rn, rm, ra);
+              case 0x3: // FNMSUB Sd = (-Sa) + Sn*Sm
+                return new FNMSubS(machInst, rd, rn, rm, ra);
+              case 0x4: // FMADD Dd = Da + Dn*Dm
+                return new FMAddD(machInst, rd, rn, rm, ra);
+              case 0x5: // FMSUB Dd = Da + (-Dn)*Dm
+                return new FMSubD(machInst, rd, rn, rm, ra);
+              case 0x6: // FNMADD Dd = (-Da) + (-Dn)*Dm
+                return new FNMAddD(machInst, rd, rn, rm, ra);
+              case 0x7: // FNMSUB Dd = (-Da) + Dn*Dm
+                return new FNMSubD(machInst, rd, rn, rm, ra);
+              default:
+                return new Unknown64(machInst);
+            }
+        } else if (bits(machInst, 21) == 0) {
+            bool s = bits(machInst, 29);
+            if (s)
+                return new Unknown64(machInst);
+            uint8_t switchVal = bits(machInst, 20, 16);
+            uint8_t type      = bits(machInst, 23, 22);
+            uint8_t scale     = bits(machInst, 15, 10);
+            IntRegIndex rd    = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+            IntRegIndex rn    = (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+            if (bits(machInst, 18, 17) == 3 && scale != 0)
+                return new Unknown64(machInst);
+            // 30:24=0011110, 21=0
+            switch (switchVal) {
+              case 0x00:
+                return new FailUnimplemented("fcvtns", machInst);
+              case 0x01:
+                return new FailUnimplemented("fcvtnu", machInst);
+              case 0x02:
+                switch ( (bits(machInst, 31) << 2) | type ) {
+                  case 0: // SCVTF Sd = convertFromInt(Wn/(2^fbits))
+                    return new FcvtSFixedFpSW(machInst, rd, rn, scale);
+                  case 1: // SCVTF Dd = convertFromInt(Wn/(2^fbits))
+                    return new FcvtSFixedFpDW(machInst, rd, rn, scale);
+                  case 4: // SCVTF Sd = convertFromInt(Xn/(2^fbits))
+                    return new FcvtSFixedFpSX(machInst, rd, rn, scale);
+                  case 5: // SCVTF Dd = convertFromInt(Xn/(2^fbits))
+                    return new FcvtSFixedFpDX(machInst, rd, rn, scale);
+                  default:
+                    return new Unknown64(machInst);
+                }
+              case 0x03:
+                switch ( (bits(machInst, 31) << 2) | type ) {
+                  case 0: // UCVTF Sd = convertFromInt(Wn/(2^fbits))
+                    return new FcvtUFixedFpSW(machInst, rd, rn, scale);
+                  case 1: // UCVTF Dd = convertFromInt(Wn/(2^fbits))
+                    return new FcvtUFixedFpDW(machInst, rd, rn, scale);
+                  case 4: // UCVTF Sd = convertFromInt(Xn/(2^fbits))
+                    return new FcvtUFixedFpSX(machInst, rd, rn, scale);
+                  case 5: // UCVTF Dd = convertFromInt(Xn/(2^fbits))
+                    return new FcvtUFixedFpDX(machInst, rd, rn, scale);
+                  default:
+                    return new Unknown64(machInst);
+                }
+              case 0x04:
+                return new FailUnimplemented("fcvtas", machInst);
+              case 0x05:
+                return new FailUnimplemented("fcvtau", machInst);
+              case 0x08:
+                return new FailUnimplemented("fcvtps", machInst);
+              case 0x09:
+                return new FailUnimplemented("fcvtpu", machInst);
+              case 0x0e:
+                return new FailUnimplemented("fmov elem. to 64", machInst);
+              case 0x0f:
+                return new FailUnimplemented("fmov 64 bit", machInst);
+              case 0x10:
+                return new FailUnimplemented("fcvtms", machInst);
+              case 0x11:
+                return new FailUnimplemented("fcvtmu", machInst);
+              case 0x18:
+                switch ( (bits(machInst, 31) << 2) | type ) {
+                  case 0: // FCVTZS Wd = convertToIntExactTowardZero(Sn*(2^fbits))
+                    return new FcvtFpSFixedSW(machInst, rd, rn, scale);
+                  case 1: // FCVTZS Wd = convertToIntExactTowardZero(Dn*(2^fbits))
+                    return new FcvtFpSFixedDW(machInst, rd, rn, scale);
+                  case 4: // FCVTZS Xd = convertToIntExactTowardZero(Sn*(2^fbits))
+                    return new FcvtFpSFixedSX(machInst, rd, rn, scale);
+                  case 5: // FCVTZS Xd = convertToIntExactTowardZero(Dn*(2^fbits))
+                    return new FcvtFpSFixedDX(machInst, rd, rn, scale);
+                  default:
+                    return new Unknown64(machInst);
+                }
+              case 0x19:
+                switch ( (bits(machInst, 31) << 2) | type ) {
+                  case 0: // FCVTZU Wd = convertToIntExactTowardZero(Sn*(2^fbits))
+                    return new FcvtFpUFixedSW(machInst, rd, rn, scale);
+                  case 1: // FCVTZU Wd = convertToIntExactTowardZero(Dn*(2^fbits))
+                    return new FcvtFpUFixedDW(machInst, rd, rn, scale);
+                  case 4: // FCVTZU Xd = convertToIntExactTowardZero(Sn*(2^fbits))
+                    return new FcvtFpUFixedSX(machInst, rd, rn, scale);
+                  case 5: // FCVTZU Xd = convertToIntExactTowardZero(Dn*(2^fbits))
+                    return new FcvtFpUFixedDX(machInst, rd, rn, scale);
+                  default:
+                    return new Unknown64(machInst);
+                }
+            }
+        } else {
+            // 30=0, 28:24=11110, 21=1
+            uint8_t type   = bits(machInst, 23, 22);
+            uint8_t imm8   = bits(machInst, 20, 13);
+            IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
+            IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 9, 5);
+            switch (bits(machInst, 11, 10)) {
+              case 0x0:
+                if (bits(machInst, 12) == 1) {
+                    if (bits(machInst, 31) ||
+                            bits(machInst, 29) ||
+                            bits(machInst, 9, 5)) {
+                        return new Unknown64(machInst);
+                    }
+                    // 31:29=000, 28:24=11110, 21=1, 12:10=100
+                    if (type == 0) {
+                        // FMOV S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,5)
+                        //             :imm8<5:0>:Zeros(19)
+                        uint32_t imm = vfp_modified_imm(imm8, false);
+                        return new FmovImmS(machInst, rd, imm);
+                    } else if (type == 1) {
+                        // FMOV D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>,8)
+                        //             :imm8<5:0>:Zeros(48)
+                        uint64_t imm = vfp_modified_imm(imm8, true);
+                        return new FmovImmD(machInst, rd, imm);
+                    } else {
+                        return new Unknown64(machInst);
+                    }
+                } else if (bits(machInst, 13) == 1) {
+                    if (bits(machInst, 31) ||
+                            bits(machInst, 29) ||
+                            bits(machInst, 15, 14) ||
+                            bits(machInst, 23) ||
+                            bits(machInst, 2, 0)) {
+                        return new Unknown64(machInst);
+                    }
+                    uint8_t switchVal = (bits(machInst, 4, 3) << 0) |
+                                        (bits(machInst, 22) << 2);
+                    IntRegIndex rm = (IntRegIndex)(uint32_t)
+                                        bits(machInst, 20, 16);
+                    // 28:23=000111100, 21=1, 15:10=001000, 2:0=000
+                    switch (switchVal) {
+                      case 0x0:
+                        // FCMP flags = compareQuiet(Sn,Sm)
+                        return new FCmpRegS(machInst, rn, rm);
+                      case 0x1:
+                        // FCMP flags = compareQuiet(Sn,0.0)
+                        return new FCmpImmS(machInst, rn, 0);
+                      case 0x2:
+                        // FCMPE flags = compareSignaling(Sn,Sm)
+                        return new FCmpERegS(machInst, rn, rm);
+                      case 0x3:
+                        // FCMPE flags = compareSignaling(Sn,0.0)
+                        return new FCmpEImmS(machInst, rn, 0);
+                      case 0x4:
+                        // FCMP flags = compareQuiet(Dn,Dm)
+                        return new FCmpRegD(machInst, rn, rm);
+                      case 0x5:
+                        // FCMP flags = compareQuiet(Dn,0.0)
+                        return new FCmpImmD(machInst, rn, 0);
+                      case 0x6:
+                        // FCMPE flags = compareSignaling(Dn,Dm)
+                        return new FCmpERegD(machInst, rn, rm);
+                      case 0x7:
+                        // FCMPE flags = compareSignaling(Dn,0.0)
+                        return new FCmpEImmD(machInst, rn, 0);
+                      default:
+                        return new Unknown64(machInst);
+                    }
+                } else if (bits(machInst, 14) == 1) {
+                    if (bits(machInst, 31) || bits(machInst, 29))
+                        return new Unknown64(machInst);
+                    uint8_t opcode = bits(machInst, 20, 15);
+                    // Bits 31:24=00011110, 21=1, 14:10=10000
+                    switch (opcode) {
+                      case 0x0:
+                        if (type == 0)
+                            // FMOV Sd = Sn
+                            return new FmovRegS(machInst, rd, rn);
+                        else if (type == 1)
+                            // FMOV Dd = Dn
+                            return new FmovRegD(machInst, rd, rn);
+                        break;
+                      case 0x1:
+                        if (type == 0)
+                            // FABS Sd = abs(Sn)
+                            return new FAbsS(machInst, rd, rn);
+                        else if (type == 1)
+                            // FABS Dd = abs(Dn)
+                            return new FAbsD(machInst, rd, rn);
+                        break;
+                      case 0x2:
+                        if (type == 0)
+                            // FNEG Sd = -Sn
+                            return new FNegS(machInst, rd, rn);
+                        else if (type == 1)
+                            // FNEG Dd = -Dn
+                            return new FNegD(machInst, rd, rn);
+                        break;
+                      case 0x3:
+                        if (type == 0)
+                            // FSQRT Sd = sqrt(Sn)
+                            return new FSqrtS(machInst, rd, rn);
+                        else if (type == 1)
+                            // FSQRT Dd = sqrt(Dn)
+                            return new FSqrtD(machInst, rd, rn);
+                        break;
+                      case 0x4:
+                        if (type == 1)
+                            // FCVT Sd = convertFormat(Dn)
+                            return new FcvtFpDFpS(machInst, rd, rn);
+                        else if (type == 3)
+                            // FCVT Sd = convertFormat(Hn)
+                            return new FcvtFpHFpS(machInst, rd, rn);
+                        break;
+                      case 0x5:
+                        if (type == 0)
+                            // FCVT Dd = convertFormat(Sn)
+                            return new FCvtFpSFpD(machInst, rd, rn);
+                        else if (type == 3)
+                            // FCVT Dd = convertFormat(Hn)
+                            return new FcvtFpHFpD(machInst, rd, rn);
+                        break;
+                      case 0x7:
+                        if (type == 0)
+                            // FCVT Hd = convertFormat(Sn)
+                            return new FcvtFpSFpH(machInst, rd, rn);
+                        else if (type == 1)
+                            // FCVT Hd = convertFormat(Dn)
+                            return new FcvtFpDFpH(machInst, rd, rn);
+                        break;
+                      case 0x8:
+                        if (type == 0) // FRINTN Sd = roundToIntegralTiesToEven(Sn)
+                            return new FRIntNS(machInst, rd, rn);
+                        else if (type == 1) // FRINTN Dd = roundToIntegralTiesToEven(Dn)
+                            return new FRIntND(machInst, rd, rn);
+                        break;
+                      case 0x9:
+                        if (type == 0) // FRINTP Sd = roundToIntegralTowardPlusInf(Sn)
+                            return new FRIntPS(machInst, rd, rn);
+                        else if (type == 1) // FRINTP Dd = roundToIntegralTowardPlusInf(Dn)
+                            return new FRIntPD(machInst, rd, rn);
+                        break;
+                      case 0xa:
+                        if (type == 0) // FRINTM Sd = roundToIntegralTowardMinusInf(Sn)
+                            return new FRIntMS(machInst, rd, rn);
+                        else if (type == 1) // FRINTM Dd = roundToIntegralTowardMinusInf(Dn)
+                            return new FRIntMD(machInst, rd, rn);
+                        break;
+                      case 0xb:
+                        if (type == 0) // FRINTZ Sd = roundToIntegralTowardZero(Sn)
+                            return new FRIntZS(machInst, rd, rn);
+                        else if (type == 1) // FRINTZ Dd = roundToIntegralTowardZero(Dn)
+                            return new FRIntZD(machInst, rd, rn);
+                        break;
+                      case 0xc:
+                        if (type == 0) // FRINTA Sd = roundToIntegralTiesToAway(Sn)
+                            return new FRIntAS(machInst, rd, rn);
+                        else if (type == 1) // FRINTA Dd = roundToIntegralTiesToAway(Dn)
+                            return new FRIntAD(machInst, rd, rn);
+                        break;
+                      case 0xe:
+                        if (type == 0) // FRINTX Sd = roundToIntegralExact(Sn)
+                            return new FRIntXS(machInst, rd, rn);
+                        else if (type == 1) // FRINTX Dd = roundToIntegralExact(Dn)
+                            return new FRIntXD(machInst, rd, rn);
+                        break;
+                      case 0xf:
+                        if (type == 0) // FRINTI Sd = roundToIntegral(Sn)
+                            return new FRIntIS(machInst, rd, rn);
+                        else if (type == 1) // FRINTI Dd = roundToIntegral(Dn)
+                            return new FRIntID(machInst, rd, rn);
+                        break;
+                      default:
+                        return new Unknown64(machInst);
+                    }
+                    return new Unknown64(machInst);
+                } else if (bits(machInst, 15) == 1) {
+                    return new Unknown64(machInst);
+                } else {
+                    if (bits(machInst, 29))
+                        return new Unknown64(machInst);
+                    uint8_t rmode      = bits(machInst, 20, 19);
+                    uint8_t switchVal1 = bits(machInst, 18, 16);
+                    uint8_t switchVal2 = (type << 1) | bits(machInst, 31);
+                    // 30:24=0011110, 21=1, 15:10=000000
+                    switch (switchVal1) {
+                      case 0x0:
+                        switch ((switchVal2 << 2) | rmode) {
+                          case 0x0: //FCVTNS Wd = convertToIntExactTiesToEven(Sn)
+                            return new FcvtFpSIntWSN(machInst, rd, rn);
+                          case 0x1: //FCVTPS Wd = convertToIntExactTowardPlusInf(Sn)
+                            return new FcvtFpSIntWSP(machInst, rd, rn);
+                          case 0x2: //FCVTMS Wd = convertToIntExactTowardMinusInf(Sn)
+                            return new FcvtFpSIntWSM(machInst, rd, rn);
+                          case 0x3: //FCVTZS Wd = convertToIntExactTowardZero(Sn)
+                            return new FcvtFpSIntWSZ(machInst, rd, rn);
+                          case 0x4: //FCVTNS Xd = convertToIntExactTiesToEven(Sn)
+                            return new FcvtFpSIntXSN(machInst, rd, rn);
+                          case 0x5: //FCVTPS Xd = convertToIntExactTowardPlusInf(Sn)
+                            return new FcvtFpSIntXSP(machInst, rd, rn);
+                          case 0x6: //FCVTMS Xd = convertToIntExactTowardMinusInf(Sn)
+                            return new FcvtFpSIntXSM(machInst, rd, rn);
+                          case 0x7: //FCVTZS Xd = convertToIntExactTowardZero(Sn)
+                            return new FcvtFpSIntXSZ(machInst, rd, rn);
+                          case 0x8: //FCVTNS Wd = convertToIntExactTiesToEven(Dn)
+                            return new FcvtFpSIntWDN(machInst, rd, rn);
+                          case 0x9: //FCVTPS Wd = convertToIntExactTowardPlusInf(Dn)
+                            return new FcvtFpSIntWDP(machInst, rd, rn);
+                          case 0xA: //FCVTMS Wd = convertToIntExactTowardMinusInf(Dn)
+                            return new FcvtFpSIntWDM(machInst, rd, rn);
+                          case 0xB: //FCVTZS Wd = convertToIntExactTowardZero(Dn)
+                            return new FcvtFpSIntWDZ(machInst, rd, rn);
+                          case 0xC: //FCVTNS Xd = convertToIntExactTiesToEven(Dn)
+                            return new FcvtFpSIntXDN(machInst, rd, rn);
+                          case 0xD: //FCVTPS Xd = convertToIntExactTowardPlusInf(Dn)
+                            return new FcvtFpSIntXDP(machInst, rd, rn);
+                          case 0xE: //FCVTMS Xd = convertToIntExactTowardMinusInf(Dn)
+                            return new FcvtFpSIntXDM(machInst, rd, rn);
+                          case 0xF: //FCVTZS Xd = convertToIntExactTowardZero(Dn)
+                            return new FcvtFpSIntXDZ(machInst, rd, rn);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                      case 0x1:
+                        switch ((switchVal2 << 2) | rmode) {
+                          case 0x0: //FCVTNU Wd = convertToIntExactTiesToEven(Sn)
+                            return new FcvtFpUIntWSN(machInst, rd, rn);
+                          case 0x1: //FCVTPU Wd = convertToIntExactTowardPlusInf(Sn)
+                            return new FcvtFpUIntWSP(machInst, rd, rn);
+                          case 0x2: //FCVTMU Wd = convertToIntExactTowardMinusInf(Sn)
+                            return new FcvtFpUIntWSM(machInst, rd, rn);
+                          case 0x3: //FCVTZU Wd = convertToIntExactTowardZero(Sn)
+                            return new FcvtFpUIntWSZ(machInst, rd, rn);
+                          case 0x4: //FCVTNU Xd = convertToIntExactTiesToEven(Sn)
+                            return new FcvtFpUIntXSN(machInst, rd, rn);
+                          case 0x5: //FCVTPU Xd = convertToIntExactTowardPlusInf(Sn)
+                            return new FcvtFpUIntXSP(machInst, rd, rn);
+                          case 0x6: //FCVTMU Xd = convertToIntExactTowardMinusInf(Sn)
+                            return new FcvtFpUIntXSM(machInst, rd, rn);
+                          case 0x7: //FCVTZU Xd = convertToIntExactTowardZero(Sn)
+                            return new FcvtFpUIntXSZ(machInst, rd, rn);
+                          case 0x8: //FCVTNU Wd = convertToIntExactTiesToEven(Dn)
+                            return new FcvtFpUIntWDN(machInst, rd, rn);
+                          case 0x9: //FCVTPU Wd = convertToIntExactTowardPlusInf(Dn)
+                            return new FcvtFpUIntWDP(machInst, rd, rn);
+                          case 0xA: //FCVTMU Wd = convertToIntExactTowardMinusInf(Dn)
+                            return new FcvtFpUIntWDM(machInst, rd, rn);
+                          case 0xB: //FCVTZU Wd = convertToIntExactTowardZero(Dn)
+                            return new FcvtFpUIntWDZ(machInst, rd, rn);
+                          case 0xC: //FCVTNU Xd = convertToIntExactTiesToEven(Dn)
+                            return new FcvtFpUIntXDN(machInst, rd, rn);
+                          case 0xD: //FCVTPU Xd = convertToIntExactTowardPlusInf(Dn)
+                            return new FcvtFpUIntXDP(machInst, rd, rn);
+                          case 0xE: //FCVTMU Xd = convertToIntExactTowardMinusInf(Dn)
+                            return new FcvtFpUIntXDM(machInst, rd, rn);
+                          case 0xF: //FCVTZU Xd = convertToIntExactTowardZero(Dn)
+                            return new FcvtFpUIntXDZ(machInst, rd, rn);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                      case 0x2:
+                        if (rmode != 0)
+                            return new Unknown64(machInst);
+                        switch (switchVal2) {
+                          case 0: // SCVTF Sd = convertFromInt(Wn)
+                            return new FcvtWSIntFpS(machInst, rd, rn);
+                          case 1: // SCVTF Sd = convertFromInt(Xn)
+                            return new FcvtXSIntFpS(machInst, rd, rn);
+                          case 2: // SCVTF Dd = convertFromInt(Wn)
+                            return new FcvtWSIntFpD(machInst, rd, rn);
+                          case 3: // SCVTF Dd = convertFromInt(Xn)
+                            return new FcvtXSIntFpD(machInst, rd, rn);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                      case 0x3:
+                        switch (switchVal2) {
+                          case 0: // UCVTF Sd = convertFromInt(Wn)
+                            return new FcvtWUIntFpS(machInst, rd, rn);
+                          case 1: // UCVTF Sd = convertFromInt(Xn)
+                            return new FcvtXUIntFpS(machInst, rd, rn);
+                          case 2: // UCVTF Dd = convertFromInt(Wn)
+                            return new FcvtWUIntFpD(machInst, rd, rn);
+                          case 3: // UCVTF Dd = convertFromInt(Xn)
+                            return new FcvtXUIntFpD(machInst, rd, rn);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                      case 0x4:
+                        if (rmode != 0)
+                            return new Unknown64(machInst);
+                        switch (switchVal2) {
+                          case 0: // FCVTAS Wd = convertToIntExactTiesToAway(Sn)
+                            return new FcvtFpSIntWSA(machInst, rd, rn);
+                          case 1: // FCVTAS Xd = convertToIntExactTiesToAway(Sn)
+                            return new FcvtFpSIntXSA(machInst, rd, rn);
+                          case 2: // FCVTAS Wd = convertToIntExactTiesToAway(Dn)
+                            return new FcvtFpSIntWDA(machInst, rd, rn);
+                          case 3: // FCVTAS Wd = convertToIntExactTiesToAway(Dn)
+                            return new FcvtFpSIntXDA(machInst, rd, rn);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                      case 0x5:
+                        switch (switchVal2) {
+                          case 0: // FCVTAU Wd = convertToIntExactTiesToAway(Sn)
+                            return new FcvtFpUIntWSA(machInst, rd, rn);
+                          case 1: // FCVTAU Xd = convertToIntExactTiesToAway(Sn)
+                            return new FcvtFpUIntXSA(machInst, rd, rn);
+                          case 2: // FCVTAU Wd = convertToIntExactTiesToAway(Dn)
+                            return new FcvtFpUIntWDA(machInst, rd, rn);
+                          case 3: // FCVTAU Xd = convertToIntExactTiesToAway(Dn)
+                            return new FcvtFpUIntXDA(machInst, rd, rn);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                      case 0x06:
+                        switch (switchVal2) {
+                          case 0: // FMOV Wd = Sn
+                            if (rmode != 0)
+                                return new Unknown64(machInst);
+                            return new FmovRegCoreW(machInst, rd, rn);
+                          case 3: // FMOV Xd = Dn
+                            if (rmode != 0)
+                                return new Unknown64(machInst);
+                            return new FmovRegCoreX(machInst, rd, rn);
+                          case 5: // FMOV Xd = Vn<127:64>
+                            if (rmode != 1)
+                                return new Unknown64(machInst);
+                            return new FmovURegCoreX(machInst, rd, rn);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                        break;
+                      case 0x07:
+                        switch (switchVal2) {
+                          case 0: // FMOV Sd = Wn
+                            if (rmode != 0)
+                                return new Unknown64(machInst);
+                            return new FmovCoreRegW(machInst, rd, rn);
+                          case 3: // FMOV Xd = Dn
+                            if (rmode != 0)
+                                return new Unknown64(machInst);
+                            return new FmovCoreRegX(machInst, rd, rn);
+                          case 5: // FMOV Xd = Vn<127:64>
+                            if (rmode != 1)
+                                return new Unknown64(machInst);
+                            return new FmovUCoreRegX(machInst, rd, rn);
+                          default:
+                            return new Unknown64(machInst);
+                        }
+                        break;
+                      default: // Warning! missing cases in switch statement above, that still need to be added
+                        return new Unknown64(machInst);
+                    }
+                }
+              case 0x1:
+              {
+                if (bits(machInst, 31) ||
+                    bits(machInst, 29) ||
+                    bits(machInst, 23)) {
+                    return new Unknown64(machInst);
+                }
+                IntRegIndex rm = (IntRegIndex)(uint32_t) bits(machInst, 20, 16);
+                IntRegIndex rn = (IntRegIndex)(uint32_t) bits(machInst, 9, 5);
+                uint8_t    imm = (IntRegIndex)(uint32_t) bits(machInst, 3, 0);
+                ConditionCode cond =
+                    (ConditionCode)(uint8_t)(bits(machInst, 15, 12));
+                uint8_t switchVal = (bits(machInst, 4) << 0) |
+                                    (bits(machInst, 22) << 1);
+                // 31:23=000111100, 21=1, 11:10=01
+                switch (switchVal) {
+                  case 0x0:
+                    // FCCMP flags = if cond the compareQuiet(Sn,Sm) else #nzcv
+                    return new FCCmpRegS(machInst, rn, rm, cond, imm);
+                  case 0x1:
+                    // FCCMP flags = if cond then compareSignaling(Sn,Sm)
+                    //               else #nzcv
+                    return new FCCmpERegS(machInst, rn, rm, cond, imm);
+                  case 0x2:
+                    // FCCMP flags = if cond then compareQuiet(Dn,Dm) else #nzcv
+                    return new FCCmpRegD(machInst, rn, rm, cond, imm);
+                  case 0x3:
+                    // FCCMP flags = if cond then compareSignaling(Dn,Dm)
+                    //               else #nzcv
+                    return new FCCmpERegD(machInst, rn, rm, cond, imm);
+                  default:
+                    return new Unknown64(machInst);
+                }
+              }
+              case 0x2:
+              {
+                if (bits(machInst, 31) ||
+                        bits(machInst, 29) ||
+                        bits(machInst, 23)) {
+                    return new Unknown64(machInst);
+                }
+                IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst,  4,  0);
+                IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst,  9,  5);
+                IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16);
+                uint8_t switchVal = (bits(machInst, 15, 12) << 0) |
+                                    (bits(machInst, 22) << 4);
+                switch (switchVal) {
+                  case 0x00: // FMUL Sd = Sn * Sm
+                    return new FMulS(machInst, rd, rn, rm);
+                  case 0x10: // FMUL Dd = Dn * Dm
+                    return new FMulD(machInst, rd, rn, rm);
+                  case 0x01: // FDIV Sd = Sn / Sm
+                    return new FDivS(machInst, rd, rn, rm);
+                  case 0x11: // FDIV Dd = Dn / Dm
+                    return new FDivD(machInst, rd, rn, rm);
+                  case 0x02: // FADD Sd = Sn + Sm
+                    return new FAddS(machInst, rd, rn, rm);
+                  case 0x12: // FADD Dd = Dn + Dm
+                    return new FAddD(machInst, rd, rn, rm);
+                  case 0x03: // FSUB Sd = Sn - Sm
+                    return new FSubS(machInst, rd, rn, rm);
+                  case 0x13: // FSUB Dd = Dn - Dm
+                    return new FSubD(machInst, rd, rn, rm);
+                  case 0x04: // FMAX Sd = max(Sn, Sm)
+                    return new FMaxS(machInst, rd, rn, rm);
+                  case 0x14: // FMAX Dd = max(Dn, Dm)
+                    return new FMaxD(machInst, rd, rn, rm);
+                  case 0x05: // FMIN Sd = min(Sn, Sm)
+                    return new FMinS(machInst, rd, rn, rm);
+                  case 0x15: // FMIN Dd = min(Dn, Dm)
+                    return new FMinD(machInst, rd, rn, rm);
+                  case 0x06: // FMAXNM Sd = maxNum(Sn, Sm)
+                    return new FMaxNMS(machInst, rd, rn, rm);
+                  case 0x16: // FMAXNM Dd = maxNum(Dn, Dm)
+                    return new FMaxNMD(machInst, rd, rn, rm);
+                  case 0x07: // FMINNM Sd = minNum(Sn, Sm)
+                    return new FMinNMS(machInst, rd, rn, rm);
+                  case 0x17: // FMINNM Dd = minNum(Dn, Dm)
+                    return new FMinNMD(machInst, rd, rn, rm);
+                  case 0x08: // FNMUL Sd = -(Sn * Sm)
+                    return new FNMulS(machInst, rd, rn, rm);
+                  case 0x18: // FNMUL Dd = -(Dn * Dm)
+                    return new FNMulD(machInst, rd, rn, rm);
+                  default:
+                    return new Unknown64(machInst);
+                }
+              }
+              case 0x3:
+              {
+                if (bits(machInst, 31) || bits(machInst, 29))
+                    return new Unknown64(machInst);
+                uint8_t type = bits(machInst, 23, 22);
+                IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst,  4,  0);
+                IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst,  9,  5);
+                IntRegIndex rm = (IntRegIndex)(uint32_t)bits(machInst, 20, 16);
+                ConditionCode cond =
+                    (ConditionCode)(uint8_t)(bits(machInst, 15, 12));
+                if (type == 0) // FCSEL Sd = if cond then Sn else Sm
+                    return new FCSelS(machInst, rd, rn, rm, cond);
+                else if (type == 1) // FCSEL Dd = if cond then Dn else Dm
+                    return new FCSelD(machInst, rd, rn, rm, cond);
+                else
+                    return new Unknown64(machInst);
+              }
+            }
+        }
+        return new FailUnimplemented("Unhandled Case4", machInst);
+    }
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeAdvSIMDScalar(ExtMachInst machInst)
+    {
+        if (bits(machInst, 24) == 1) {
+            if (bits(machInst, 10) == 0) {
+                return decodeNeonScIndexedElem(machInst);
+            } else if (bits(machInst, 23) == 0) {
+                return decodeNeonScShiftByImm(machInst);
+            }
+        } else if (bits(machInst, 21) == 1) {
+            if (bits(machInst, 10) == 1) {
+                return decodeNeonSc3Same(machInst);
+            } else if (bits(machInst, 11) == 0) {
+                return decodeNeonSc3Diff(machInst);
+            } else if (bits(machInst, 20, 17) == 0x0) {
+                return decodeNeonSc2RegMisc(machInst);
+            } else if (bits(machInst, 20, 17) == 0x8) {
+                return decodeNeonScPwise(machInst);
+            } else {
+                return new Unknown64(machInst);
+            }
+        } else if (bits(machInst, 23, 22) == 0 &&
+                   bits(machInst, 15) == 0 &&
+                   bits(machInst, 10) == 1) {
+            return decodeNeonScCopy(machInst);
+        } else {
+            return new Unknown64(machInst);
+        }
+        return new FailUnimplemented("Unhandled Case6", machInst);
+    }
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeFpAdvSIMD(ExtMachInst machInst)
+    {
+
+        if (bits(machInst, 28) == 0) {
+            if (bits(machInst, 31) == 0) {
+                return decodeAdvSIMD(machInst);
+            } else {
+                return new Unknown64(machInst);
+            }
+        } else if (bits(machInst, 30) == 0) {
+            return decodeFp(machInst);
+        } else if (bits(machInst, 31) == 0) {
+            return decodeAdvSIMDScalar(machInst);
+        } else {
+            return new Unknown64(machInst);
+        }
+    }
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeGem5Ops(ExtMachInst machInst)
+    {
+        const uint32_t m5func = bits(machInst, 23, 16);
+        switch (m5func) {
+          case 0x00: return new Arm(machInst);
+          case 0x01: return new Quiesce(machInst);
+          case 0x02: return new QuiesceNs64(machInst);
+          case 0x03: return new QuiesceCycles64(machInst);
+          case 0x04: return new QuiesceTime64(machInst);
+          case 0x07: return new Rpns64(machInst);
+          case 0x09: return new WakeCPU64(machInst);
+          case 0x10: return new Deprecated_ivlb(machInst);
+          case 0x11: return new Deprecated_ivle(machInst);
+          case 0x20: return new Deprecated_exit (machInst);
+          case 0x21: return new M5exit64(machInst);
+          case 0x31: return new Loadsymbol(machInst);
+          case 0x30: return new Initparam64(machInst);
+          case 0x40: return new Resetstats64(machInst);
+          case 0x41: return new Dumpstats64(machInst);
+          case 0x42: return new Dumpresetstats64(machInst);
+          case 0x43: return new M5checkpoint64(machInst);
+          case 0x4F: return new M5writefile64(machInst);
+          case 0x50: return new M5readfile64(machInst);
+          case 0x51: return new M5break(machInst);
+          case 0x52: return new M5switchcpu(machInst);
+          case 0x53: return new M5addsymbol64(machInst);
+          case 0x54: return new M5panic(machInst);
+          case 0x5a: return new M5workbegin64(machInst);
+          case 0x5b: return new M5workend64(machInst);
+          default: return new Unknown64(machInst);
+        }
+    }
+}
+}};
+
+def format Aarch64() {{
+    decode_block = '''
+    {
+        using namespace Aarch64;
+        if (bits(machInst, 27) == 0x0) {
+            if (bits(machInst, 28) == 0x0)
+                return new Unknown64(machInst);
+            else if (bits(machInst, 26) == 0)
+                // bit 28:26=100
+                return decodeDataProcImm(machInst);
+            else
+                // bit 28:26=101
+                return decodeBranchExcSys(machInst);
+        } else if (bits(machInst, 25) == 0) {
+            // bit 27=1, 25=0
+            return decodeLoadsStores(machInst);
+        } else if (bits(machInst, 26) == 0) {
+            // bit 27:25=101
+            return decodeDataProcReg(machInst);
+        } else if (bits(machInst, 24) == 1 &&
+                   bits(machInst, 31, 28) == 0xF) {
+            return decodeGem5Ops(machInst);
+        } else {
+            // bit 27:25=111
+            return decodeFpAdvSIMD(machInst);
+        }
+    }
+    '''
+}};
diff --git a/src/arch/arm/isa/formats/branch.isa b/src/arch/arm/isa/formats/branch.isa
index f1b17ec90..513506d31 100644
--- a/src/arch/arm/isa/formats/branch.isa
+++ b/src/arch/arm/isa/formats/branch.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2012-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -101,7 +101,7 @@ def format Thumb16CondBranchAndSvc() {{
             return new B(machInst, sext<9>(bits(machInst, 7, 0) << 1),
                          (ConditionCode)(uint32_t)bits(machInst, 11, 8));
         } else if (bits(machInst, 8)) {
-            return new Svc(machInst);
+            return new Svc(machInst, bits(machInst, 7, 0));
         } else {
             // This space will not be allocated in the future.
             return new Unknown(machInst);
@@ -127,7 +127,7 @@ def format Thumb32BranchesAndMiscCtrl() {{
                     // Permanently undefined.
                     return new Unknown(machInst);
                 } else {
-                    return new WarnUnimplemented("smc", machInst);
+                    return new Smc(machInst);
                 }
             } else if ((op & 0x38) != 0x38) {
                 const uint32_t s = bits(machInst, 26);
@@ -141,20 +141,26 @@ def format Thumb32BranchesAndMiscCtrl() {{
                 return new B(machInst, imm,
                              (ConditionCode)(uint32_t)bits(machInst, 25, 22));
             } else {
+                // HIGH: 12-11=10, LOW: 15-14=00, 12=0
                 switch (op) {
                   case 0x38:
-                    {
-                        const IntRegIndex rn =
-                            (IntRegIndex)(uint32_t)bits(machInst, 19, 16);
-                        const uint8_t byteMask = bits(machInst, 11, 8);
-                        return new MsrCpsrReg(machInst, rn, byteMask);
-                    }
                   case 0x39:
                     {
                         const IntRegIndex rn =
                             (IntRegIndex)(uint32_t)bits(machInst, 19, 16);
                         const uint8_t byteMask = bits(machInst, 11, 8);
-                        return new MsrSpsrReg(machInst, rn, byteMask);
+                        const bool    r        = bits(machInst, 20);
+                        if (bits(machInst, 5)) {
+                            const uint8_t sysM = (bits(machInst, 4) << 4) |
+                                                  byteMask;
+                            return new MsrBankedReg(machInst, rn, sysM, r);
+                        } else {
+                            if (r) {
+                                return new MsrSpsrReg(machInst, rn, byteMask);
+                            } else {
+                                return new MsrCpsrReg(machInst, rn, byteMask);
+                            }
+                        }
                     }
                   case 0x3a:
                     {
@@ -196,11 +202,11 @@ def format Thumb32BranchesAndMiscCtrl() {{
                           case 0x2:
                             return new Clrex(machInst);
                           case 0x4:
-                            return new Dsb(machInst);
+                            return new Dsb(machInst, 0);
                           case 0x5:
-                            return new Dmb(machInst);
+                            return new Dmb(machInst, 0);
                           case 0x6:
-                            return new Isb(machInst);
+                            return new Isb(machInst, 0);
                           default:
                             break;
                         }
@@ -208,28 +214,44 @@ def format Thumb32BranchesAndMiscCtrl() {{
                     }
                   case 0x3c:
                     {
-                        // On systems that don't support bxj, bxj == bx
-                        return new BxReg(machInst,
+                        return new BxjReg(machInst,
                                  (IntRegIndex)(uint32_t)bits(machInst, 19, 16),
                                  COND_UC);
                     }
                   case 0x3d:
                     {
                         const uint32_t imm32 = bits(machInst, 7, 0);
-                        return new SubsImmPclr(machInst, INTREG_PC, INTREG_LR,
-                                               imm32, false);
+                        if (imm32 == 0) {
+                            return new Eret(machInst);
+                        } else {
+                            return new SubsImmPclr(machInst, INTREG_PC,
+                                                   INTREG_LR, imm32, false);
+                        }
                     }
                   case 0x3e:
-                    {
-                        const IntRegIndex rd =
-                            (IntRegIndex)(uint32_t)bits(machInst, 11, 8);
-                        return new MrsCpsr(machInst, rd);
-                    }
                   case 0x3f:
                     {
+
                         const IntRegIndex rd =
                             (IntRegIndex)(uint32_t)bits(machInst, 11, 8);
-                        return new MrsSpsr(machInst, rd);
+                        const bool    r        = bits(machInst, 20);
+                        if (bits(machInst, 5)) {
+                            const uint8_t sysM = (bits(machInst, 4) << 4) |
+                                                  bits(machInst, 11, 8);
+                            return new MrsBankedReg(machInst, rd, sysM, r);
+                        } else {
+                            if (r) {
+                                return new MrsSpsr(machInst, rd);
+                            } else {
+                                return new MrsCpsr(machInst, rd);
+                            }
+                        }
+                    }
+                  case 0xfe:
+                    {
+                        uint32_t imm16 = (bits(machInst, 19, 16) << 12) |
+                                         (bits(machInst, 11,  0) <<  0);
+                        return new Hvc(machInst, imm16);
                     }
                 }
                 break;
diff --git a/src/arch/arm/isa/formats/formats.isa b/src/arch/arm/isa/formats/formats.isa
index 90144c101..44e9c5b5e 100644
--- a/src/arch/arm/isa/formats/formats.isa
+++ b/src/arch/arm/isa/formats/formats.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -44,6 +44,12 @@
 //Include the basic format
 ##include "basic.isa"
 
+//Include support for decoding AArch64 instructions
+##include "aarch64.isa"
+
+//Include support for decoding AArch64 NEON instructions
+##include "neon64.isa"
+
 //Include support for predicated instructions
 ##include "pred.isa"
 
diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa
index 6d779e541..ccd4589a3 100644
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -151,8 +151,7 @@ let {{
             if (singleAll) {
                 size = bits(machInst, 7, 6);
                 bool t = bits(machInst, 5);
-                unsigned eBytes = (1 << size);
-                align = (eBytes - 1) | TLB::AllowUnaligned;
+                align = size | TLB::AllowUnaligned;
                 if (width == 1) {
                     regs = t ? 2 : 1;
                     inc = 1;
@@ -164,7 +163,7 @@ let {{
                   case 1:
                   case 2:
                     if (bits(machInst, 4))
-                        align = width * eBytes - 1;
+                        align = size + width - 1;
                     break;
                   case 3:
                     break;
@@ -173,20 +172,19 @@ let {{
                         if (bits(machInst, 4) == 0)
                             return new Unknown(machInst);
                         size = 2;
-                        align = 0xf;
+                        align = 0x4;
                     } else if (size == 2) {
                         if (bits(machInst, 4))
-                            align = 7;
+                            align = 0x3;
                     } else {
                         if (bits(machInst, 4))
-                            align = 4 * eBytes - 1;
+                            align = size + 2;
                     }
                     break;
                 }
             } else {
                 size = bits(machInst, 11, 10);
-                unsigned eBytes = (1 << size);
-                align = (eBytes - 1) | TLB::AllowUnaligned;
+                align = size | TLB::AllowUnaligned;
                 regs = width;
                 unsigned indexAlign = bits(machInst, 7, 4);
                 // If width is 1, inc is always 1. That's overridden later.
@@ -219,13 +217,13 @@ let {{
                         break;
                       case 2:
                         if (bits(indexAlign, 1, 0))
-                            align = 3;
+                            align = 2;
                         break;
                     }
                     break;
                   case 2:
                     if (bits(indexAlign, 0))
-                        align = (2 * eBytes) - 1;
+                        align = size + 1;
                     break;
                   case 3:
                     break;
@@ -234,11 +232,11 @@ let {{
                       case 0:
                       case 1:
                         if (bits(indexAlign, 0))
-                            align = (4 * eBytes) - 1;
+                            align = size + 2;
                         break;
                       case 2:
                         if (bits(indexAlign, 0))
-                            align = (4 << bits(indexAlign, 1, 0)) - 1;
+                            align = bits(indexAlign, 1, 0) + 2;
                         break;
                     }
                     break;
@@ -252,9 +250,9 @@ let {{
             align = bits(machInst, 5, 4);
             if (align == 0) {
                 // @align wasn't specified, so alignment can be turned off.
-                align = ((1 << size) - 1) | TLB::AllowUnaligned;
+                align = size | TLB::AllowUnaligned;
             } else {
-                align = ((4 << align) - 1);
+                align = align + 2;
             }
             switch (width) {
               case 1:
@@ -588,6 +586,23 @@ let {{
                 }
             }
           case 0xc:
+            if (b) {
+                if (!u) {
+                    if (bits(c, 1) == 0) {
+                        if (q) {
+                            return new NVfmaQFp<float>(machInst, vd, vn, vm);
+                        } else {
+                            return new NVfmaDFp<float>(machInst, vd, vn, vm);
+                        }
+                    } else {
+                        if (q) {
+                            return new NVfmsQFp<float>(machInst, vd, vn, vm);
+                        } else {
+                            return new NVfmsDFp<float>(machInst, vd, vn, vm);
+                        }
+                    }
+                }
+            }
             return new Unknown(machInst);
           case 0xd:
             if (b) {
@@ -1827,7 +1842,7 @@ let {{
             break;
           case 0x1:
             {
-                if (offset == 0 || vd + offset/2 > NumFloatArchRegs) {
+                if (offset == 0 || vd + offset/2 > NumFloatV7ArchRegs) {
                     break;
                 }
                 switch (bits(opcode, 1, 0)) {
@@ -1951,8 +1966,9 @@ let {{
             } else if (a == 0x7) {
                 const IntRegIndex rt =
                     (IntRegIndex)(uint32_t)bits(machInst, 15, 12);
-                uint32_t specReg = bits(machInst, 19, 16);
-                switch (specReg) {
+                uint32_t reg = bits(machInst, 19, 16);
+                uint32_t specReg;
+                switch (reg) {
                   case 0:
                     specReg = MISCREG_FPSID;
                     break;
@@ -1974,7 +1990,9 @@ let {{
                 if (specReg == MISCREG_FPSCR) {
                     return new VmsrFpscr(machInst, (IntRegIndex)specReg, rt);
                 } else {
-                    return new Vmsr(machInst, (IntRegIndex)specReg, rt);
+                    uint32_t iss = mcrMrcIssBuild(0, bits(machInst, 3, 0), rt,
+                        reg, a, bits(machInst, 7, 5));
+                    return new Vmsr(machInst, (IntRegIndex)specReg, rt, iss);
                 }
             }
         } else if (l == 0 && c == 1) {
@@ -2041,8 +2059,9 @@ let {{
             } else if (a == 7) {
                 const IntRegIndex rt =
                     (IntRegIndex)(uint32_t)bits(machInst, 15, 12);
-                uint32_t specReg = bits(machInst, 19, 16);
-                switch (specReg) {
+                uint32_t reg = bits(machInst, 19, 16);
+                uint32_t specReg;
+                switch (reg) {
                   case 0:
                     specReg = MISCREG_FPSID;
                     break;
@@ -2070,7 +2089,9 @@ let {{
                 } else if (specReg == MISCREG_FPSCR) {
                     return new VmrsFpscr(machInst, rt, (IntRegIndex)specReg);
                 } else {
-                    return new Vmrs(machInst, rt, (IntRegIndex)specReg);
+                    uint32_t iss = mcrMrcIssBuild(l, bits(machInst, 3, 0), rt,
+                        reg, a, bits(machInst, 7, 5));
+                    return new Vmrs(machInst, rt, (IntRegIndex)specReg, iss);
                 }
             }
         } else {
@@ -2235,6 +2256,44 @@ let {{
                 }
             }
             break;
+          case 0x9:
+            if ((opc3 & 0x1) == 0) {
+                if (single) {
+                    return decodeVfpRegRegRegOp<VfnmaS>(
+                            machInst, vd, vn, vm, false);
+                } else {
+                    return decodeVfpRegRegRegOp<VfnmaD>(
+                            machInst, vd, vn, vm, true);
+                }
+            } else {
+                if (single) {
+                    return decodeVfpRegRegRegOp<VfnmsS>(
+                            machInst, vd, vn, vm, false);
+                } else {
+                    return decodeVfpRegRegRegOp<VfnmsD>(
+                            machInst, vd, vn, vm, true);
+                }
+            }
+            break;
+          case 0xa:
+            if ((opc3 & 0x1) == 0) {
+                if (single) {
+                    return decodeVfpRegRegRegOp<VfmaS>(
+                            machInst, vd, vn, vm, false);
+                } else {
+                    return decodeVfpRegRegRegOp<VfmaD>(
+                            machInst, vd, vn, vm, true);
+                }
+            } else {
+                if (single) {
+                    return decodeVfpRegRegRegOp<VfmsS>(
+                            machInst, vd, vn, vm, false);
+                } else {
+                    return decodeVfpRegRegRegOp<VfmsD>(
+                            machInst, vd, vn, vm, true);
+                }
+            }
+            break;
           case 0xb:
             if ((opc3 & 0x1) == 0) {
                 const uint32_t baseImm =
diff --git a/src/arch/arm/isa/formats/mem.isa b/src/arch/arm/isa/formats/mem.isa
index f7830eff3..abac27021 100644
--- a/src/arch/arm/isa/formats/mem.isa
+++ b/src/arch/arm/isa/formats/mem.isa
@@ -282,7 +282,7 @@ def format Thumb32SrsRfe() {{
             }
         } else {
             const uint32_t mode = bits(machInst, 4, 0);
-            if (badMode((OperatingMode)mode))
+            if (badMode32((OperatingMode)mode))
                 return new Unknown(machInst);
             if (!add && !wb) {
                 return new %(srs)s(machInst, mode,
diff --git a/src/arch/arm/isa/formats/misc.isa b/src/arch/arm/isa/formats/misc.isa
index 00a37d17b..647f9846d 100644
--- a/src/arch/arm/isa/formats/misc.isa
+++ b/src/arch/arm/isa/formats/misc.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010-2012 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -36,19 +36,42 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Authors: Gabe Black
+//          Giacomo Gabrielli
+
+def format ArmERet() {{
+    decode_block = "return new Eret(machInst);"
+}};
 
 def format Svc() {{
-    decode_block = "return new Svc(machInst);"
+    decode_block = "return new Svc(machInst, bits(machInst, 23, 0));"
+}};
+
+def format ArmSmcHyp() {{
+    decode_block = '''
+    {
+        if (bits(machInst, 21))
+        {
+            return new Smc(machInst);
+        } else {
+            uint32_t imm16 = (bits(machInst, 19, 8) << 4) |
+                             (bits(machInst,  3, 0) << 0);
+            return new Hvc(machInst, imm16);
+        }
+    }
+    '''
 }};
 
 def format ArmMsrMrs() {{
     decode_block = '''
     {
         const uint8_t byteMask = bits(machInst, 19, 16);
+        const uint8_t sysM     = byteMask | (bits(machInst, 8) << 4);
         const IntRegIndex rn = (IntRegIndex)(uint32_t)bits(machInst, 3, 0);
         const IntRegIndex rd = (IntRegIndex)(uint32_t)bits(machInst, 15, 12);
         const uint32_t opcode = bits(machInst, 24, 21);
         const bool useImm = bits(machInst, 25);
+        const bool r      = bits(machInst, 22);
+        const bool isBanked = bits(machInst, 9);
 
         const uint32_t unrotated = bits(machInst, 7, 0);
         const uint32_t rotation = (bits(machInst, 11, 8) << 1);
@@ -56,20 +79,36 @@ def format ArmMsrMrs() {{
 
         switch (opcode) {
           case 0x8:
-            return new MrsCpsr(machInst, rd);
+            if (isBanked) {
+                return new MrsBankedReg(machInst, rd, sysM, r!=0);
+            } else {
+                return new MrsCpsr(machInst, rd);
+            }
           case 0x9:
             if (useImm) {
                 return new MsrCpsrImm(machInst, imm, byteMask);
             } else {
-                return new MsrCpsrReg(machInst, rn, byteMask);
+                if (isBanked) {
+                    return new MsrBankedReg(machInst, rn, sysM, r!=0);
+                } else {
+                    return new MsrCpsrReg(machInst, rn, byteMask);
+                }
             }
           case 0xa:
-            return new MrsSpsr(machInst, rd);
+            if (isBanked) {
+                return new MrsBankedReg(machInst, rd, sysM, r!=0);
+            } else {
+                return new MrsSpsr(machInst, rd);
+            }
           case 0xb:
             if (useImm) {
                 return new MsrSpsrImm(machInst, imm, byteMask);
             } else {
-                return new MsrSpsrReg(machInst, rn, byteMask);
+                if (isBanked) {
+                    return new MsrBankedReg(machInst, rn, sysM, r!=0);
+                } else {
+                    return new MsrSpsrReg(machInst, rn, byteMask);
+                }
             }
           default:
             return new Unknown(machInst);
@@ -99,16 +138,17 @@ let {{
         switch (miscReg) {
           case MISCREG_NOP:
             return new NopInst(machInst);
-          case NUM_MISCREGS:
+          case MISCREG_CP14_UNIMPL:
             return new FailUnimplemented(
                     csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown",
                     crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(),
                     machInst);
           default:
+            uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2);
             if (isRead) {
-                return new Mrc14(machInst, rt, (IntRegIndex)miscReg);
+                return new Mrc14(machInst, rt, (IntRegIndex)miscReg, iss);
             } else {
-                return new Mcr14(machInst, (IntRegIndex)miscReg, rt);
+                return new Mcr14(machInst, (IntRegIndex)miscReg, rt, iss);
             }
         }
     }
@@ -123,8 +163,8 @@ def format McrMrc14() {{
 
 let {{
     header_output = '''
-    StaticInstPtr
-    decodeMcrMrc15(ExtMachInst machInst);
+    StaticInstPtr decodeMcrMrc14(ExtMachInst machInst);
+    StaticInstPtr decodeMcrMrc15(ExtMachInst machInst);
     '''
     decoder_output = '''
     StaticInstPtr
@@ -136,107 +176,50 @@ let {{
         const uint32_t crm = bits(machInst, 3, 0);
         const MiscRegIndex miscReg = decodeCP15Reg(crn, opc1, crm, opc2);
         const IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 15, 12);
-
         const bool isRead = bits(machInst, 20);
+        uint32_t iss = mcrMrcIssBuild(isRead, crm, rt, crn, opc1, opc2);
 
         switch (miscReg) {
           case MISCREG_NOP:
             return new NopInst(machInst);
-          case NUM_MISCREGS:
+          case MISCREG_CP15_UNIMPL:
             return new FailUnimplemented(
                     csprintf("miscreg crn:%d opc1:%d crm:%d opc2:%d %s unknown",
                     crn, opc1, crm, opc2, isRead ? "read" : "write").c_str(),
                     machInst);
-          case MISCREG_DCCISW:
-            return new WarnUnimplemented(
-                    isRead ? "mrc dccisw" : "mcr dcisw", machInst);
-          case MISCREG_DCCIMVAC:
-            return new WarnUnimplemented(
-                    isRead ? "mrc dccimvac" : "mcr dccimvac", machInst);
-          case MISCREG_DCIMVAC:
-            return new WarnUnimplemented(
-                    isRead ? "mrc dcimvac" : "mcr dcimvac", machInst);
           case MISCREG_DCCMVAC:
             return new FlushPipeInst(
                     isRead ? "mrc dccmvac" : "mcr dccmvac", machInst);
-          case MISCREG_DCCMVAU:
-            return new WarnUnimplemented(
-                    isRead ? "mrc dccmvau" : "mcr dccmvau", machInst);
           case MISCREG_CP15ISB:
-            return new Isb(machInst);
+            return new Isb(machInst, iss);
           case MISCREG_CP15DSB:
-            return new Dsb(machInst);
+            return new Dsb(machInst, iss);
           case MISCREG_CP15DMB:
-            return new Dmb(machInst);
-          case MISCREG_ICIALLUIS:
-            return new WarnUnimplemented(
-                    isRead ? "mrc icialluis" : "mcr icialluis", machInst);
-          case MISCREG_ICIMVAU:
-            return new WarnUnimplemented(
-                    isRead ? "mrc icimvau" : "mcr icimvau", machInst);
-          case MISCREG_BPIMVA:
-            return new WarnUnimplemented(
-                    isRead ? "mrc bpimva" : "mcr bpimva", machInst);
-          case MISCREG_BPIALLIS:
-            return new WarnUnimplemented(
-                    isRead ? "mrc bpiallis" : "mcr bpiallis", machInst);
-          case MISCREG_BPIALL:
-            return new WarnUnimplemented(
-                    isRead ? "mrc bpiall" : "mcr bpiall", machInst);
-          case MISCREG_L2LATENCY:
-            return new WarnUnimplemented(
-                    isRead ? "mrc l2latency" : "mcr l2latency", machInst);
-          case MISCREG_CRN15:
-            return new WarnUnimplemented(
-                    isRead ? "mrc crn15" : "mcr crn15", machInst);
-
-            // Write only.
-          case MISCREG_TLBIALLIS:
-          case MISCREG_TLBIMVAIS:
-          case MISCREG_TLBIASIDIS:
-          case MISCREG_TLBIMVAAIS:
-          case MISCREG_ITLBIALL:
-          case MISCREG_ITLBIMVA:
-          case MISCREG_ITLBIASID:
-          case MISCREG_DTLBIALL:
-          case MISCREG_DTLBIMVA:
-          case MISCREG_DTLBIASID:
-          case MISCREG_TLBIALL:
-          case MISCREG_TLBIMVA:
-          case MISCREG_TLBIASID:
-          case MISCREG_TLBIMVAA:
-            if (isRead) {
-                return new Unknown(machInst);
-            } else {
-                return new Mcr15(machInst, (IntRegIndex)miscReg, rt);
-            }
-
-            // Read only in user mode.
-          case MISCREG_TPIDRURO:
-            if (isRead) {
-                return new Mrc15User(machInst, rt, (IntRegIndex)miscReg);
-            } else {
-                return new Mcr15(machInst, (IntRegIndex)miscReg, rt);
-            }
-
-            // Read/write in user mode.
-          case MISCREG_TPIDRURW:
-            if (isRead) {
-                return new Mrc15User(machInst, rt, (IntRegIndex)miscReg);
-            } else {
-                return new Mcr15User(machInst, (IntRegIndex)miscReg, rt);
-            }
-
-            // Read/write, priveleged only.
+            return new Dmb(machInst, iss);
           default:
-            if (miscReg >= MISCREG_CP15_UNIMP_START)
+            if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) {
+                std::string full_mnem = csprintf("%s %s",
+                    isRead ? "mrc" : "mcr", miscRegName[miscReg]);
+                warn("\\tinstruction '%s' unimplemented\\n", full_mnem);
+
+                // Remove the warn flag and set the implemented flag. This
+                // prevents the instruction warning a second time, it also
+                // means the instruction is actually generated. Actually
+                // creating the instruction to access an register that isn't
+                // implemented sounds a bit silly, but its required to get
+                // the correct behaviour for hyp traps and undef exceptions.
+                miscRegInfo[miscReg][MISCREG_IMPLEMENTED]   = true;
+                miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false;
+            }
+
+            if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) {
+                if (isRead)
+                    return new Mrc15(machInst, rt, (IntRegIndex)miscReg, iss);
+                return new Mcr15(machInst, (IntRegIndex)miscReg, rt, iss);
+            } else {
                 return new FailUnimplemented(csprintf("%s %s",
                     isRead ? "mrc" : "mcr", miscRegName[miscReg]).c_str(),
                     machInst);
-            if (isRead) {
-                return new Mrc15(machInst, rt, (IntRegIndex)miscReg);
-            } else {
-                return new Mcr15(machInst, (IntRegIndex)miscReg, rt);
             }
         }
     }
@@ -248,3 +231,70 @@ def format McrMrc15() {{
     return decodeMcrMrc15(machInst);
     '''
 }};
+
+let {{
+    header_output = '''
+    StaticInstPtr
+    decodeMcrrMrrc15(ExtMachInst machInst);
+    '''
+    decoder_output = '''
+    StaticInstPtr
+    decodeMcrrMrrc15(ExtMachInst machInst)
+    {
+        const uint32_t crm = bits(machInst, 3, 0);
+        const uint32_t opc1 = bits(machInst, 7, 4);
+        const MiscRegIndex miscReg = decodeCP15Reg64(crm, opc1);
+        const IntRegIndex rt = (IntRegIndex) (uint32_t) bits(machInst, 15, 12);
+        const IntRegIndex rt2 = (IntRegIndex) (uint32_t) bits(machInst, 19, 16);
+
+        const bool isRead = bits(machInst, 20);
+
+        switch (miscReg) {
+          case MISCREG_CP15_UNIMPL:
+            return new FailUnimplemented(
+                    csprintf("miscreg crm:%d opc1:%d 64-bit %s unknown",
+                    crm, opc1, isRead ? "read" : "write").c_str(),
+                    machInst);
+          default:
+            if (miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL]) {
+                std::string full_mnem = csprintf("%s %s",
+                    isRead ? "mrrc" : "mcrr", miscRegName[miscReg]);
+                warn("\\tinstruction '%s' unimplemented\\n", full_mnem);
+
+                // Remove the warn flag and set the implemented flag. This
+                // prevents the instruction warning a second time, it also
+                // means the instruction is actually generated. Actually
+                // creating the instruction to access an register that isn't
+                // implemented sounds a bit silly, but its required to get
+                // the correct behaviour for hyp traps and undef exceptions.
+                miscRegInfo[miscReg][MISCREG_IMPLEMENTED]   = true;
+                miscRegInfo[miscReg][MISCREG_WARN_NOT_FAIL] = false;
+            }
+
+            if (miscRegInfo[miscReg][MISCREG_IMPLEMENTED]) {
+                uint32_t iss = mcrrMrrcIssBuild(isRead, crm, rt, rt2, opc1);
+
+                if (isRead)
+                    return new Mrrc15(machInst, (IntRegIndex) miscReg, rt2, rt, iss);
+                return new Mcrr15(machInst, rt2, rt, (IntRegIndex) miscReg, iss);
+            } else {
+                return new FailUnimplemented(csprintf("%s %s",
+                    isRead ? "mrrc" : "mcrr", miscRegName[miscReg]).c_str(),
+                    machInst);
+            }
+        }
+    }
+    '''
+}};
+
+def format Mcrr15() {{
+    decode_block = '''
+    return decodeMcrrMrrc15(machInst);
+    '''
+}};
+
+def format Mrrc15() {{
+    decode_block = '''
+    return decodeMcrrMrrc15(machInst);
+    '''
+}};
diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa
new file mode 100644
index 000000000..72bbd0c60
--- /dev/null
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -0,0 +1,2626 @@
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Giacomo Gabrielli
+//          Mbou Eyole
+
+output header {{
+namespace Aarch64
+{
+    // AdvSIMD three same
+    StaticInstPtr decodeNeon3Same(ExtMachInst machInst);
+    // AdvSIMD three different
+    StaticInstPtr decodeNeon3Diff(ExtMachInst machInst);
+    // AdvSIMD two-reg misc
+    StaticInstPtr decodeNeon2RegMisc(ExtMachInst machInst);
+    // AdvSIMD across lanes
+    StaticInstPtr decodeNeonAcrossLanes(ExtMachInst machInst);
+    // AdvSIMD copy
+    StaticInstPtr decodeNeonCopy(ExtMachInst machInst);
+    // AdvSIMD vector x indexed element
+    StaticInstPtr decodeNeonIndexedElem(ExtMachInst machInst);
+    // AdvSIMD modified immediate
+    StaticInstPtr decodeNeonModImm(ExtMachInst machInst);
+    // AdvSIMD shift by immediate
+    StaticInstPtr decodeNeonShiftByImm(ExtMachInst machInst);
+    // AdvSIMD TBL/TBX
+    StaticInstPtr decodeNeonTblTbx(ExtMachInst machInst);
+    // AdvSIMD ZIP/UZP/TRN
+    StaticInstPtr decodeNeonZipUzpTrn(ExtMachInst machInst);
+    // AdvSIMD EXT
+    StaticInstPtr decodeNeonExt(ExtMachInst machInst);
+
+    // AdvSIMD scalar three same
+    StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst);
+    // AdvSIMD scalar three different
+    StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst);
+    // AdvSIMD scalar two-reg misc
+    StaticInstPtr decodeNeonSc2RegMisc(ExtMachInst machInst);
+    // AdvSIMD scalar pairwise
+    StaticInstPtr decodeNeonScPwise(ExtMachInst machInst);
+    // AdvSIMD scalar copy
+    StaticInstPtr decodeNeonScCopy(ExtMachInst machInst);
+    // AdvSIMD scalar x indexed element
+    StaticInstPtr decodeNeonScIndexedElem(ExtMachInst machInst);
+    // AdvSIMD scalar shift by immediate
+    StaticInstPtr decodeNeonScShiftByImm(ExtMachInst machInst);
+
+    // AdvSIMD load/store
+    StaticInstPtr decodeNeonMem(ExtMachInst machInst);
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    StaticInstPtr
+    decodeNeon3Same(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 15, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        uint8_t size_q = (size << 1) | q;
+        uint8_t sz_q = size_q & 0x3;
+
+        switch (opcode) {
+          case 0x00:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UhaddDX, UhaddQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<ShaddDX, ShaddQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x01:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<UqaddDX, UqaddQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeXReg<SqaddDX, SqaddQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x02:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UrhaddDX, UrhaddQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SrhaddDX, SrhaddQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x03:
+            switch (size) {
+              case 0x0:
+                if (u) {
+                    if (q)
+                        return new EorQX<uint64_t>(machInst, vd, vn, vm);
+                    else
+                        return new EorDX<uint64_t>(machInst, vd, vn, vm);
+                } else {
+                    if (q)
+                        return new AndQX<uint64_t>(machInst, vd, vn, vm);
+                    else
+                        return new AndDX<uint64_t>(machInst, vd, vn, vm);
+                }
+              case 0x1:
+                if (u) {
+                    if (q)
+                        return new BslQX<uint64_t>(machInst, vd, vn, vm);
+                    else
+                        return new BslDX<uint64_t>(machInst, vd, vn, vm);
+                } else {
+                    if (q)
+                        return new BicQX<uint64_t>(machInst, vd, vn, vm);
+                    else
+                        return new BicDX<uint64_t>(machInst, vd, vn, vm);
+                }
+              case 0x2:
+                if (u) {
+                    if (q)
+                        return new BitQX<uint64_t>(machInst, vd, vn, vm);
+                    else
+                        return new BitDX<uint64_t>(machInst, vd, vn, vm);
+                } else {
+                    if (q)
+                        return new OrrQX<uint64_t>(machInst, vd, vn, vm);
+                    else
+                        return new OrrDX<uint64_t>(machInst, vd, vn, vm);
+                }
+              case 0x3:
+                if (u) {
+                    if (q)
+                        return new BifQX<uint64_t>(machInst, vd, vn, vm);
+                    else
+                        return new BifDX<uint64_t>(machInst, vd, vn, vm);
+                } else {
+                    if (q)
+                        return new OrnQX<uint64_t>(machInst, vd, vn, vm);
+                    else
+                        return new OrnDX<uint64_t>(machInst, vd, vn, vm);
+                }
+            }
+          case 0x04:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UhsubDX, UhsubQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<ShsubDX, ShsubQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x05:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<UqsubDX, UqsubQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeXReg<SqsubDX, SqsubQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x06:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<CmhiDX, CmhiQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeXReg<CmgtDX, CmgtQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x07:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<CmhsDX, CmhsQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeXReg<CmgeDX, CmgeQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x08:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<UshlDX, UshlQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeXReg<SshlDX, SshlQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x09:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<UqshlDX, UqshlQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeXReg<SqshlDX, SqshlQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x0a:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<UrshlDX, UrshlQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeXReg<SrshlDX, SrshlQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x0b:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<UqrshlDX, UqrshlQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeXReg<SqrshlDX, SqrshlQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x0c:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UmaxDX, UmaxQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SmaxDX, SmaxQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x0d:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UminDX, UminQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SminDX, SminQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x0e:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UabdDX, UabdQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SabdDX, SabdQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x0f:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UabaDX, UabaQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SabaDX, SabaQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x10:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<SubDX, SubQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonUThreeXReg<AddDX, AddQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x11:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeXReg<CmeqDX, CmeqQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonUThreeXReg<CmtstDX, CmtstQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x12:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<MlsDX, MlsQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonUThreeSReg<MlaDX, MlaQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x13:
+            if (size == 0x3 || (size != 0x0 && bits(machInst, 29)))
+                return new Unknown64(machInst);
+            if (u) {
+                if (q)
+                    return new PmulQX<uint8_t>(machInst, vd, vn, vm);
+                else
+                    return new PmulDX<uint8_t>(machInst, vd, vn, vm);
+            } else {
+                return decodeNeonUThreeSReg<MulDX, MulQX>(
+                    q, size, machInst, vd, vn, vm);
+            }
+          case 0x14:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UmaxpDX, UmaxpQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SmaxpDX, SmaxpQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x15:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UminpDX, UminpQX>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SminpDX, SminpQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x16:
+            if (size == 0x3 || size == 0x0)
+                return new Unknown64(machInst);
+            if (u) {
+                if (q)
+                    return decodeNeonSThreeHAndWReg<SqrdmulhQX>(
+                        size, machInst, vd, vn, vm);
+                else
+                    return decodeNeonSThreeHAndWReg<SqrdmulhDX>(
+                        size, machInst, vd, vn, vm);
+            } else {
+                if (q)
+                    return decodeNeonSThreeHAndWReg<SqdmulhQX>(
+                        size, machInst, vd, vn, vm);
+                else
+                    return decodeNeonSThreeHAndWReg<SqdmulhDX>(
+                        size, machInst, vd, vn, vm);
+            }
+          case 0x17:
+            if (u || size_q == 0x6)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUThreeXReg<AddpDX, AddpQX>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x18:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2) {
+                if (u)
+                    return decodeNeonUThreeFpReg<FmaxnmpDX, FmaxnmpQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FmaxnmDX, FmaxnmQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            } else {
+                if (u)
+                    return decodeNeonUThreeFpReg<FminnmpDX, FminnmpQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FminnmDX, FminnmQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            }
+          case 0x19:
+            if (size < 0x2) {
+                if (u || sz_q == 0x2)
+                    return new Unknown64(machInst);
+                else
+                    return decodeNeonUThreeFpReg<FmlaDX, FmlaQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            } else {
+                if (u || sz_q == 0x2)
+                    return new Unknown64(machInst);
+                else
+                    return decodeNeonUThreeFpReg<FmlsDX, FmlsQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            }
+          case 0x1a:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2) {
+                if (u)
+                    return decodeNeonUThreeFpReg<FaddpDX, FaddpQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FaddDX, FaddQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            } else {
+                if (u)
+                    return decodeNeonUThreeFpReg<FabdDX, FabdQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FsubDX, FsubQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            }
+          case 0x1b:
+            if (size < 0x2 && sz_q != 0x2) {
+                if (u)
+                    return decodeNeonUThreeFpReg<FmulDX, FmulQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FmulxDX, FmulxQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            } else {
+                return new Unknown64(machInst);
+            }
+          case 0x1c:
+            if (size < 0x2) {
+                if (u)
+                    return decodeNeonUThreeFpReg<FcmgeDX, FcmgeQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FcmeqDX, FcmeqQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            } else {
+                if (u)
+                    return decodeNeonUThreeFpReg<FcmgtDX, FcmgtQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return new Unknown64(machInst);
+            }
+          case 0x1d:
+            if (size < 0x2) {
+                if (u)
+                    return decodeNeonUThreeFpReg<FacgeDX, FacgeQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return new Unknown64(machInst);
+            } else {
+                if (u)
+                    return decodeNeonUThreeFpReg<FacgtDX, FacgtQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return new Unknown64(machInst);
+            }
+          case 0x1e:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2) {
+                if (u)
+                    return decodeNeonUThreeFpReg<FmaxpDX, FmaxpQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FmaxDX, FmaxQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            } else {
+                if (u)
+                    return decodeNeonUThreeFpReg<FminpDX, FminpQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FminDX, FminQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            }
+          case 0x1f:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2) {
+                if (u)
+                    return decodeNeonUThreeFpReg<FdivDX, FdivQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeFpReg<FrecpsDX, FrecpsQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            } else {
+                if (u)
+                    return new Unknown64(machInst);
+                else
+                    return decodeNeonUThreeFpReg<FrsqrtsDX, FrsqrtsQX>(
+                        q, size & 0x1, machInst, vd, vn, vm);
+            }
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeon3Diff(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 15, 12);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        switch (opcode) {
+          case 0x0:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UaddlX, Uaddl2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SaddlX, Saddl2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x1:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UaddwX, Uaddw2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SaddwX, Saddw2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x2:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UsublX, Usubl2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SsublX, Ssubl2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x3:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UsubwX, Usubw2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SsubwX, Ssubw2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x4:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<RaddhnX, Raddhn2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonUThreeSReg<AddhnX, Addhn2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x5:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UabalX, Uabal2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SabalX, Sabal2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x6:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<RsubhnX, Rsubhn2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonUThreeSReg<SubhnX, Subhn2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x7:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UabdlX, Uabdl2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SabdlX, Sabdl2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x8:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UmlalX, Umlal2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SmlalX, Smlal2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0x9:
+            if (u || (size == 0x0 || size == 0x3)) {
+                return new Unknown64(machInst);
+            } else {
+                if (q) {
+                    return decodeNeonSThreeHAndWReg<Sqdmlal2X>(
+                        size, machInst, vd, vn, vm);
+                } else {
+                    return decodeNeonSThreeHAndWReg<SqdmlalX>(
+                        size, machInst, vd, vn, vm);
+                }
+            }
+          case 0xa:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UmlslX, Umlsl2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SmlslX, Smlsl2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0xb:
+            if (u || (size == 0x0 || size == 0x3)) {
+                return new Unknown64(machInst);
+            } else {
+                if (q) {
+                    return decodeNeonSThreeHAndWReg<Sqdmlsl2X>(
+                        size, machInst, vd, vn, vm);
+                } else {
+                    return decodeNeonSThreeHAndWReg<SqdmlslX>(
+                        size, machInst, vd, vn, vm);
+                }
+            }
+          case 0xc:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeSReg<UmullX, Umull2X>(
+                    q, size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeSReg<SmullX, Smull2X>(
+                    q, size, machInst, vd, vn, vm);
+          case 0xd:
+            if (u || (size == 0x0 || size == 0x3)) {
+                return new Unknown64(machInst);
+            } else {
+                if (q) {
+                    return decodeNeonSThreeHAndWReg<Sqdmull2X>(
+                        size, machInst, vd, vn, vm);
+                } else {
+                    return decodeNeonSThreeHAndWReg<SqdmullX>(
+                        size, machInst, vd, vn, vm);
+                }
+            }
+          case 0xe:
+            if (u || size != 0) {
+                return new Unknown64(machInst);
+            } else {
+                if (q)
+                    return new Pmull2X<uint8_t>(machInst, vd, vn, vm);
+                else
+                    return new PmullX<uint8_t>(machInst, vd, vn, vm);
+            }
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeon2RegMisc(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 16, 12);
+
+        IntRegIndex vd = (IntRegIndex)(uint8_t)bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex)(uint8_t)bits(machInst, 9, 5);
+
+        uint8_t size_q = (size << 1) | q;
+        uint8_t sz_q = size_q & 0x3;
+        uint8_t op = (uint8_t)((bits(machInst, 12) << 1) |
+                               bits(machInst, 29));
+        uint8_t switchVal = opcode | ((u ? 1 : 0) << 5);
+
+        switch (switchVal) {
+          case 0x00:
+            if (op + size >= 3)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscSReg<Rev64DX, Rev64QX>(
+                q, size, machInst, vd, vn);
+          case 0x01:
+            if (op + size >= 3)
+                return new Unknown64(machInst);
+            if (q)
+                return new Rev16QX<uint8_t>(machInst, vd, vn);
+            else
+                return new Rev16DX<uint8_t>(machInst, vd, vn);
+          case 0x02:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscSReg<SaddlpDX, SaddlpQX>(
+                q, size, machInst, vd, vn);
+          case 0x03:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscXReg<SuqaddDX, SuqaddQX>(
+                q, size, machInst, vd, vn);
+          case 0x04:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscSReg<ClsDX, ClsQX>(
+                q, size, machInst, vd, vn);
+          case 0x05:
+            if (size != 0x0)
+                return new Unknown64(machInst);
+            if (q)
+                return new CntQX<uint8_t>(machInst, vd, vn);
+            else
+                return new CntDX<uint8_t>(machInst, vd, vn);
+          case 0x06:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscSReg<SadalpDX, SadalpQX>(
+                q, size, machInst, vd, vn);
+          case 0x07:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<SqabsDX, SqabsQX>(
+                q, size, machInst, vd, vn);
+          case 0x08:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<CmgtZeroDX, CmgtZeroQX>(
+                q, size, machInst, vd, vn);
+          case 0x09:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<CmeqZeroDX, CmeqZeroQX>(
+                q, size, machInst, vd, vn);
+          case 0x0a:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<CmltZeroDX, CmltZeroQX>(
+                q, size, machInst, vd, vn);
+          case 0x0b:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<AbsDX, AbsQX>(
+                q, size, machInst, vd, vn);
+          case 0x0c:
+            if (size < 0x2 || sz_q == 0x2)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FcmgtZeroDX, FcmgtZeroQX>(
+                q, size & 0x1, machInst, vd, vn);
+          case 0x0d:
+            if (size < 0x2 || sz_q == 0x2)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FcmeqZeroDX, FcmeqZeroQX>(
+                q, size & 0x1, machInst, vd, vn);
+          case 0x0e:
+            if (size < 0x2 || sz_q == 0x2)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FcmltZeroDX, FcmltZeroQX>(
+                q, size & 0x1, machInst, vd, vn);
+          case 0x0f:
+            if (size < 0x2 || sz_q == 0x2)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FabsDX, FabsQX>(
+                q, size & 0x1, machInst, vd, vn);
+          case 0x12:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscSReg<XtnX, Xtn2X>(
+                q, size, machInst, vd, vn);
+          case 0x14:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscSReg<SqxtnX, Sqxtn2X>(
+                q, size, machInst, vd, vn);
+          case 0x16:
+            if (size > 0x1)
+                return new Unknown64(machInst);
+            if (q) {
+                if (size)
+                    return new Fcvtn2X<uint32_t>(machInst, vd, vn);
+                else
+                    return new Fcvtn2X<uint16_t>(machInst, vd, vn);
+            } else {
+                if (size)
+                    return new FcvtnX<uint32_t>(machInst, vd, vn);
+                else
+                    return new FcvtnX<uint16_t>(machInst, vd, vn);
+            }
+          case 0x17:
+            if (size > 0x1)
+                return new Unknown64(machInst);
+            if (q) {
+                if (size)
+                    return new Fcvtl2X<uint32_t>(machInst, vd, vn);
+                else
+                    return new Fcvtl2X<uint16_t>(machInst, vd, vn);
+            } else {
+                if (size)
+                    return new FcvtlX<uint32_t>(machInst, vd, vn);
+                else
+                    return new FcvtlX<uint16_t>(machInst, vd, vn);
+            }
+          case 0x18:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUTwoMiscFpReg<FrintnDX, FrintnQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscFpReg<FrintpDX, FrintpQX>(
+                    q, size & 0x1, machInst, vd, vn);
+          case 0x19:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUTwoMiscFpReg<FrintmDX, FrintmQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscFpReg<FrintzDX, FrintzQX>(
+                    q, size & 0x1, machInst, vd, vn);
+          case 0x1a:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUTwoMiscFpReg<FcvtnsDX, FcvtnsQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscFpReg<FcvtpsDX, FcvtpsQX>(
+                    q, size & 0x1, machInst, vd, vn);
+          case 0x1b:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUTwoMiscFpReg<FcvtmsDX, FcvtmsQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscFpReg<FcvtzsIntDX, FcvtzsIntQX>(
+                    q, size & 0x1, machInst, vd, vn);
+          case 0x1c:
+            if (size < 0x2) {
+                if (sz_q == 0x2)
+                    return new Unknown64(machInst);
+                return decodeNeonUTwoMiscFpReg<FcvtasDX, FcvtasQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            } else {
+                if (size & 0x1)
+                    return new Unknown64(machInst);
+                if (q)
+                    return new UrecpeQX<uint32_t>(machInst, vd, vn);
+                else
+                    return new UrecpeDX<uint32_t>(machInst, vd, vn);
+            }
+          case 0x1d:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2) {
+                if (q) {
+                    if (size & 0x1)
+                        return new ScvtfIntDQX<uint64_t>(machInst, vd, vn);
+                    else
+                        return new ScvtfIntSQX<uint32_t>(machInst, vd, vn);
+                } else {
+                    if (size & 0x1)
+                        return new Unknown(machInst);
+                    else
+                        return new ScvtfIntDX<uint32_t>(machInst, vd, vn);
+                }
+            } else {
+                return decodeNeonUTwoMiscFpReg<FrecpeDX, FrecpeQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            }
+          case 0x20:
+            if (op + size >= 3)
+                return new Unknown64(machInst);
+            if (q) {
+                if (size & 0x1)
+                    return new Rev32QX<uint16_t>(machInst, vd, vn);
+                else
+                    return new Rev32QX<uint8_t>(machInst, vd, vn);
+            } else {
+                if (size & 0x1)
+                    return new Rev32DX<uint16_t>(machInst, vd, vn);
+                else
+                    return new Rev32DX<uint8_t>(machInst, vd, vn);
+            }
+          case 0x22:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscSReg<UaddlpDX, UaddlpQX>(
+                q, size, machInst, vd, vn);
+          case 0x23:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscXReg<UsqaddDX, UsqaddQX>(
+                q, size, machInst, vd, vn);
+            return new Unknown64(machInst);
+          case 0x24:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscSReg<ClzDX, ClzQX>(
+                q, size, machInst, vd, vn);
+          case 0x25:
+            if (size == 0x0) {
+                if (q)
+                    return new MvnQX<uint64_t>(machInst, vd, vn);
+                else
+                    return new MvnDX<uint64_t>(machInst, vd, vn);
+            } else if (size == 0x1) {
+                if (q)
+                    return new RbitQX<uint8_t>(machInst, vd, vn);
+                else
+                    return new RbitDX<uint8_t>(machInst, vd, vn);
+            } else {
+                return new Unknown64(machInst);
+            }
+          case 0x26:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscSReg<UadalpDX, UadalpQX>(
+                q, size, machInst, vd, vn);
+          case 0x27:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<SqnegDX, SqnegQX>(
+                q, size, machInst, vd, vn);
+          case 0x28:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<CmgeZeroDX, CmgeZeroQX>(
+                q, size, machInst, vd, vn);
+          case 0x29:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<CmleZeroDX, CmleZeroQX>(
+                q, size, machInst, vd, vn);
+          case 0x2b:
+            if (size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscXReg<NegDX, NegQX>(
+                q, size, machInst, vd, vn);
+          case 0x2c:
+            if (size < 0x2 || sz_q == 0x2)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FcmgeZeroDX, FcmgeZeroQX>(
+                q, size & 0x1, machInst, vd, vn);
+          case 0x2d:
+            if (size < 0x2 || sz_q == 0x2)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FcmleZeroDX, FcmleZeroQX>(
+                q, size & 0x1, machInst, vd, vn);
+          case 0x2f:
+            if (size < 0x2 || size_q == 0x6)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FnegDX, FnegQX>(
+                q, size & 0x1, machInst, vd, vn);
+          case 0x32:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSTwoMiscSReg<SqxtunX, Sqxtun2X>(
+                q, size, machInst, vd, vn);
+          case 0x33:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscSReg<ShllX, Shll2X>(
+                q, size, machInst, vd, vn);
+          case 0x34:
+            if (size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscSReg<UqxtnX, Uqxtn2X>(
+                q, size, machInst, vd, vn);
+          case 0x36:
+            if (size != 0x1)
+                return new Unknown64(machInst);
+            if (q)
+                return new Fcvtxn2X<uint32_t>(machInst, vd, vn);
+            else
+                return new FcvtxnX<uint32_t>(machInst, vd, vn);
+          case 0x38:
+            if (size > 0x1 || sz_q == 0x2)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FrintaDX, FrintaQX>(
+                q, size & 0x1, machInst, vd, vn);
+          case 0x39:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUTwoMiscFpReg<FrintxDX, FrintxQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscFpReg<FrintiDX, FrintiQX>(
+                    q, size & 0x1, machInst, vd, vn);
+          case 0x3a:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUTwoMiscFpReg<FcvtnuDX, FcvtnuQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscFpReg<FcvtpuDX, FcvtpuQX>(
+                    q, size & 0x1, machInst, vd, vn);
+          case 0x3b:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUTwoMiscFpReg<FcvtmuDX, FcvtmuQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscFpReg<FcvtzuIntDX, FcvtzuIntQX>(
+                    q, size & 0x1, machInst, vd, vn);
+          case 0x3c:
+            if (size < 0x2) {
+                return decodeNeonUTwoMiscFpReg<FcvtauDX, FcvtauQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            } else if (size == 0x2) {
+                if (q)
+                    return new UrsqrteQX<uint32_t>(machInst, vd, vn);
+                else
+                    return new UrsqrteDX<uint32_t>(machInst, vd, vn);
+            } else {
+                return new Unknown64(machInst);
+            }
+          case 0x3d:
+            if (sz_q == 0x2)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUTwoMiscFpReg<UcvtfIntDX, UcvtfIntQX>(
+                    q, size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscFpReg<FrsqrteDX, FrsqrteQX>(
+                    q, size & 0x1, machInst, vd, vn);
+          case 0x3f:
+            if (size < 0x2 || sz_q == 0x2)
+                return new Unknown64(machInst);
+            return decodeNeonUTwoMiscFpReg<FsqrtDX, FsqrtQX>(
+                q, size & 0x1, machInst, vd, vn);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonAcrossLanes(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 16, 12);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+
+        uint8_t size_q = (size << 1) | q;
+        uint8_t sz_q = size_q & 0x3;
+        uint8_t switchVal = opcode | ((u ? 1 : 0) << 5);
+
+        switch (switchVal) {
+          case 0x03:
+            if (size_q == 0x4 || size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSAcrossLanesLongReg<SaddlvDX, SaddlvQX,
+                                                 SaddlvBQX>(
+                q, size, machInst, vd, vn);
+          case 0x0a:
+            if (size_q == 0x4 || size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSAcrossLanesReg<SmaxvDX, SmaxvQX>(
+                q, size, machInst, vd, vn);
+          case 0x1a:
+            if (size_q == 0x4 || size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonSAcrossLanesReg<SminvDX, SminvQX>(
+                q, size, machInst, vd, vn);
+          case 0x1b:
+            if (size_q == 0x4 || size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUAcrossLanesReg<AddvDX, AddvQX>(
+                q, size, machInst, vd, vn);
+          case 0x23:
+            if (size_q == 0x4 || size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUAcrossLanesLongReg<UaddlvDX, UaddlvQX,
+                                                 UaddlvBQX>(
+                q, size, machInst, vd, vn);
+          case 0x2a:
+            if (size_q == 0x4 || size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUAcrossLanesReg<UmaxvDX, UmaxvQX>(
+                q, size, machInst, vd, vn);
+          case 0x2c:
+            if (sz_q != 0x1)
+                return new Unknown64(machInst);
+            if (size < 0x2) {
+                if (q)
+                    return new FmaxnmvQX<uint32_t>(machInst, vd, vn);
+                else
+                    return new Unknown64(machInst);
+            } else {
+                if (q)
+                    return new FminnmvQX<uint32_t>(machInst, vd, vn);
+                else
+                    return new Unknown64(machInst);
+            }
+          case 0x2f:
+            if (sz_q != 0x1)
+                return new Unknown64(machInst);
+            if (size < 0x2) {
+                if (q)
+                    return new FmaxvQX<uint32_t>(machInst, vd, vn);
+                else
+                    return new Unknown64(machInst);
+            } else {
+                if (q)
+                    return new FminvQX<uint32_t>(machInst, vd, vn);
+                else
+                    return new Unknown64(machInst);
+            }
+          case 0x3a:
+            if (size_q == 0x4 || size == 0x3)
+                return new Unknown64(machInst);
+            return decodeNeonUAcrossLanesReg<UminvDX, UminvQX>(
+                q, size, machInst, vd, vn);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonCopy(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t op = bits(machInst, 29);
+        uint8_t imm5 = bits(machInst, 20, 16);
+        uint8_t imm4 = bits(machInst, 14, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+
+        uint8_t imm5_pos = findLsbSet(imm5);
+        uint8_t index1 = 0, index2 = 0;
+
+        if (op) {
+            if (!q || (imm4 & mask(imm5_pos)))
+                return new Unknown64(machInst);
+
+            index1 = bits(imm5, 4, imm5_pos + 1);  // dst
+            index2 = bits(imm4, 3, imm5_pos);  // src
+
+            switch (imm5_pos) {
+              case 0:
+                return new InsElemX<uint8_t>(machInst, vd, vn, index1, index2);
+              case 1:
+                return new InsElemX<uint16_t>(machInst, vd, vn, index1, index2);
+              case 2:
+                return new InsElemX<uint32_t>(machInst, vd, vn, index1, index2);
+              case 3:
+                return new InsElemX<uint64_t>(machInst, vd, vn, index1, index2);
+              default:
+                return new Unknown64(machInst);
+            }
+        }
+
+        switch (imm4) {
+          case 0x0:
+            index1 = bits(imm5, 4, imm5_pos + 1);
+            switch (imm5_pos) {
+              case 0:
+                if (q)
+                    return new DupElemQX<uint8_t>(machInst, vd, vn, index1);
+                else
+                    return new DupElemDX<uint8_t>(machInst, vd, vn, index1);
+              case 1:
+                if (q)
+                    return new DupElemQX<uint16_t>(machInst, vd, vn, index1);
+                else
+                    return new DupElemDX<uint16_t>(machInst, vd, vn, index1);
+              case 2:
+                if (q)
+                    return new DupElemQX<uint32_t>(machInst, vd, vn, index1);
+                else
+                    return new DupElemDX<uint32_t>(machInst, vd, vn, index1);
+              case 3:
+                if (q)
+                    return new DupElemQX<uint64_t>(machInst, vd, vn, index1);
+                else
+                    return new Unknown64(machInst);
+              default:
+                return new Unknown64(machInst);
+            }
+          case 0x1:
+            switch (imm5) {
+              case 0x1:
+                if (q)
+                    return new DupGprWQX<uint8_t>(machInst, vd, vn);
+                else
+                    return new DupGprWDX<uint8_t>(machInst, vd, vn);
+              case 0x2:
+                if (q)
+                    return new DupGprWQX<uint16_t>(machInst, vd, vn);
+                else
+                    return new DupGprWDX<uint16_t>(machInst, vd, vn);
+              case 0x4:
+                if (q)
+                    return new DupGprWQX<uint32_t>(machInst, vd, vn);
+                else
+                    return new DupGprWDX<uint32_t>(machInst, vd, vn);
+              case 0x8:
+                if (q)
+                    return new DupGprXQX<uint64_t>(machInst, vd, vn);
+                else
+                    return new Unknown64(machInst);
+            }
+          case 0x3:
+            index1 = imm5 >> (imm5_pos + 1);
+            switch (imm5_pos) {
+              case 0:
+                return new InsGprWX<uint8_t>(machInst, vd, vn, index1);
+              case 1:
+                return new InsGprWX<uint16_t>(machInst, vd, vn, index1);
+              case 2:
+                return new InsGprWX<uint32_t>(machInst, vd, vn, index1);
+              case 3:
+                return new InsGprXX<uint64_t>(machInst, vd, vn, index1);
+              default:
+                return new Unknown64(machInst);
+            }
+          case 0x5:
+            index1 = bits(imm5, 4, imm5_pos + 1);
+            switch (imm5_pos) {
+              case 0:
+                if (q)
+                    return new SmovXX<int8_t>(machInst, vd, vn, index1);
+                else
+                    return new SmovWX<int8_t>(machInst, vd, vn, index1);
+              case 1:
+                if (q)
+                    return new SmovXX<int16_t>(machInst, vd, vn, index1);
+                else
+                    return new SmovWX<int16_t>(machInst, vd, vn, index1);
+              case 2:
+                if (q)
+                    return new SmovXX<int32_t>(machInst, vd, vn, index1);
+                else
+                    return new Unknown64(machInst);
+              default:
+                return new Unknown64(machInst);
+            }
+          case 0x7:
+            index1 = imm5 >> (imm5_pos + 1);
+
+            if ((q && imm5_pos != 3) || (!q && imm5_pos >= 3))
+                return new Unknown64(machInst);
+
+            switch (imm5_pos) {
+              case 0:
+                return new UmovWX<uint8_t>(machInst, vd, vn, index1);
+              case 1:
+                return new UmovWX<uint16_t>(machInst, vd, vn, index1);
+              case 2:
+                return new UmovWX<uint32_t>(machInst, vd, vn, index1);
+              case 3:
+                return new UmovXX<uint64_t>(machInst, vd, vn, index1);
+              default:
+                return new Unknown64(machInst);
+            }
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonIndexedElem(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t L = bits(machInst, 21);
+        uint8_t M = bits(machInst, 20);
+        uint8_t opcode = bits(machInst, 15, 12);
+        uint8_t H = bits(machInst, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16);
+
+        uint8_t index = 0;
+        uint8_t index_fp = 0;
+        uint8_t vmh = 0;
+        uint8_t sz = size & 0x1;
+        uint8_t sz_q = (sz << 1) | bits(machInst, 30);
+        uint8_t sz_L = (sz << 1) | L;
+
+        // Index and 2nd register operand for integer instructions
+        if (size == 0x1) {
+            index = (H << 2) | (L << 1) | M;
+            // vmh = 0;
+        } else if (size == 0x2) {
+            index = (H << 1) | L;
+            vmh = M;
+        }
+        IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);
+
+        // Index and 2nd register operand for FP instructions
+        vmh = M;
+        if ((size & 0x1) == 0) {
+            index_fp = (H << 1) | L;
+        } else if (L == 0) {
+            index_fp = H;
+        }
+        IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);
+
+        switch (opcode) {
+          case 0x0:
+            if (!u || (size == 0x0 || size == 0x3))
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUThreeImmHAndWReg<MlaElemDX, MlaElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0x1:
+            if (!u && size >= 2 && sz_q != 0x2 && sz_L != 0x3)
+                return decodeNeonUThreeImmFpReg<FmlaElemDX, FmlaElemQX>(
+                    q, sz, machInst, vd, vn, vm_fp, index_fp);
+            else
+                return new Unknown64(machInst);
+          case 0x2:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeImmHAndWReg<UmlalElemX, UmlalElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+            else
+                return decodeNeonSThreeImmHAndWReg<SmlalElemX, SmlalElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0x3:
+            if (u || (size == 0x0 || size == 0x3))
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqdmlalElemX,
+                                                   SqdmlalElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0x4:
+            if (u && !(size == 0x0 || size == 0x3))
+                return decodeNeonUThreeImmHAndWReg<MlsElemDX, MlsElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
+            else
+                return new Unknown64(machInst);
+          case 0x5:
+            if (!u && size >= 0x2 && sz_L != 0x3 && sz_q != 0x2)
+                return decodeNeonUThreeImmFpReg<FmlsElemDX, FmlsElemQX>(
+                    q, sz, machInst, vd, vn, vm_fp, index_fp);
+            else
+                return new Unknown64(machInst);
+          case 0x6:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeImmHAndWReg<UmlslElemX, UmlslElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+            else
+                return decodeNeonSThreeImmHAndWReg<SmlslElemX, SmlslElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0x7:
+            if (u || (size == 0x0 || size == 0x3))
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqdmlslElemX,
+                                                   SqdmlslElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0x8:
+            if (u || (size == 0x0 || size == 0x3))
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUThreeImmHAndWReg<MulElemDX, MulElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0x9:
+            if (size >= 2 && sz_q != 0x2 && sz_L != 0x3) {
+                if (u)
+                    return decodeNeonUThreeImmFpReg<FmulxElemDX, FmulxElemQX>(
+                        q, sz, machInst, vd, vn, vm_fp, index_fp);
+                else
+                    return decodeNeonUThreeImmFpReg<FmulElemDX, FmulElemQX>(
+                        q, sz, machInst, vd, vn, vm_fp, index_fp);
+            } else {
+                return new Unknown64(machInst);
+            }
+          case 0xa:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeImmHAndWReg<UmullElemX, UmullElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+            else
+                return decodeNeonSThreeImmHAndWReg<SmullElemX, SmullElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0xb:
+            if (u || (size == 0x0 || size == 0x3))
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqdmullElemX, SqdmullElem2X>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0xc:
+            if (u || (size == 0x0 || size == 0x3))
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0xd:
+            if (u || (size == 0x0 || size == 0x3))
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX, SqrdmulhElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonModImm(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t op = bits(machInst, 29);
+        uint8_t abcdefgh = (bits(machInst, 18, 16) << 5) |
+                           bits(machInst, 9, 5);
+        uint8_t cmode = bits(machInst, 15, 12);
+        uint8_t o2 = bits(machInst, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+
+        if (o2 == 0x1 || (op == 0x1 && cmode == 0xf && !q))
+            return new Unknown64(machInst);
+
+        bool immValid = true;
+        const uint64_t bigImm = simd_modified_imm(op, cmode, abcdefgh,
+                                                  immValid,
+                                                  true /* isAarch64 */);
+        if (!immValid) {
+            return new Unknown(machInst);
+        }
+
+        if (op) {
+            if (bits(cmode, 3) == 0) {
+                if (bits(cmode, 0) == 0) {
+                    if (q)
+                        return new MvniQX<uint64_t>(machInst, vd, bigImm);
+                    else
+                        return new MvniDX<uint64_t>(machInst, vd, bigImm);
+                } else {
+                    if (q)
+                        return new BicImmQX<uint64_t>(machInst, vd, bigImm);
+                    else
+                        return new BicImmDX<uint64_t>(machInst, vd, bigImm);
+                }
+            } else {
+                if (bits(cmode, 2) == 1) {
+                    switch (bits(cmode, 1, 0)) {
+                      case 0:
+                      case 1:
+                        if (q)
+                            return new MvniQX<uint64_t>(machInst, vd, bigImm);
+                        else
+                            return new MvniDX<uint64_t>(machInst, vd, bigImm);
+                      case 2:
+                        if (q)
+                            return new MoviQX<uint64_t>(machInst, vd, bigImm);
+                        else
+                            return new MoviDX<uint64_t>(machInst, vd, bigImm);
+                      case 3:
+                        if (q)
+                            return new FmovQX<uint64_t>(machInst, vd, bigImm);
+                        else
+                            return new MoviDX<uint64_t>(machInst, vd, bigImm);
+                    }
+                } else {
+                    if (bits(cmode, 0) == 0) {
+                        if (q)
+                            return new MvniQX<uint64_t>(machInst, vd, bigImm);
+                        else
+                            return new MvniDX<uint64_t>(machInst, vd, bigImm);
+                    } else {
+                        if (q)
+                            return new BicImmQX<uint64_t>(machInst, vd,
+                                                          bigImm);
+                        else
+                            return new BicImmDX<uint64_t>(machInst, vd,
+                                                          bigImm);
+                    }
+                }
+            }
+        } else {
+            if (bits(cmode, 3) == 0) {
+                if (bits(cmode, 0) == 0) {
+                    if (q)
+                        return new MoviQX<uint64_t>(machInst, vd, bigImm);
+                    else
+                        return new MoviDX<uint64_t>(machInst, vd, bigImm);
+                } else {
+                    if (q)
+                        return new OrrImmQX<uint64_t>(machInst, vd, bigImm);
+                    else
+                        return new OrrImmDX<uint64_t>(machInst, vd, bigImm);
+                }
+            } else {
+                if (bits(cmode, 2) == 1) {
+                    if (bits(cmode, 1, 0) == 0x3) {
+                        if (q)
+                            return new FmovQX<uint32_t>(machInst, vd, bigImm);
+                        else
+                            return new FmovDX<uint32_t>(machInst, vd, bigImm);
+                    } else {
+                        if (q)
+                            return new MoviQX<uint64_t>(machInst, vd, bigImm);
+                        else
+                            return new MoviDX<uint64_t>(machInst, vd, bigImm);
+                    }
+                } else {
+                    if (bits(cmode, 0) == 0) {
+                        if (q)
+                            return new MoviQX<uint64_t>(machInst, vd, bigImm);
+                        else
+                            return new MoviDX<uint64_t>(machInst, vd, bigImm);
+                    } else {
+                        if (q)
+                            return new OrrImmQX<uint64_t>(machInst, vd,
+                                                          bigImm);
+                        else
+                            return new OrrImmDX<uint64_t>(machInst, vd, bigImm);
+                    }
+                }
+            }
+        }
+        return new Unknown(machInst);
+    }
+
+    StaticInstPtr
+    decodeNeonShiftByImm(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t u = bits(machInst, 29);
+        uint8_t immh = bits(machInst, 22, 19);
+        uint8_t immb = bits(machInst, 18, 16);
+        uint8_t opcode = bits(machInst, 15, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+
+        uint8_t immh3 = bits(machInst, 22);
+        uint8_t immh3_q = (immh3 << 1) | q;
+        uint8_t op_u = (bits(machInst, 12) << 1) | u;
+        uint8_t size = findMsbSet(immh);
+        int shiftAmt = 0;
+
+        switch (opcode) {
+          case 0x00:
+            if (immh3_q == 0x2)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftXReg<UshrDX, UshrQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftXReg<SshrDX, SshrQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x02:
+            if (immh3_q == 0x2)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftXReg<UsraDX, UsraQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftXReg<SsraDX, SsraQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x04:
+            if (immh3_q == 0x2)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftXReg<UrshrDX, UrshrQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftXReg<SrshrDX, SrshrQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x06:
+            if (immh3_q == 0x2)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftXReg<UrsraDX, UrsraQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftXReg<SrsraDX, SrsraQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x08:
+            if (u && !(immh3_q == 0x2)) {
+                shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+                return decodeNeonUTwoShiftXReg<SriDX, SriQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            } else {
+                return new Unknown64(machInst);
+            }
+          case 0x0a:
+            if (immh3_q == 0x2)
+                return new Unknown64(machInst);
+            shiftAmt = ((immh << 3) | immb) - (8 << size);
+            if (u)
+                return decodeNeonUTwoShiftXReg<SliDX, SliQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonUTwoShiftXReg<ShlDX, ShlQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x0c:
+            if (u && !(immh3_q == 0x2 || op_u == 0x0)) {
+                shiftAmt = ((immh << 3) | immb) - (8 << size);
+                return decodeNeonSTwoShiftXReg<SqshluDX, SqshluQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            } else {
+                return new Unknown64(machInst);
+            }
+          case 0x0e:
+            if (immh3_q == 0x2 || op_u == 0x0)
+                return new Unknown64(machInst);
+            shiftAmt = ((immh << 3) | immb) - (8 << size);
+            if (u)
+                return decodeNeonUTwoShiftXReg<UqshlImmDX, UqshlImmQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftXReg<SqshlImmDX, SqshlImmQX>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x10:
+            if (immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonSTwoShiftSReg<SqshrunX, Sqshrun2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonUTwoShiftSReg<ShrnX, Shrn2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x11:
+            if (immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonSTwoShiftSReg<SqrshrunX, Sqrshrun2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonUTwoShiftSReg<RshrnX, Rshrn2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x12:
+            if (immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftSReg<UqshrnX, Uqshrn2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftSReg<SqshrnX, Sqshrn2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x13:
+            if (immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftSReg<UqrshrnX, Uqrshrn2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftSReg<SqrshrnX, Sqrshrn2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x14:
+            if (immh3)
+                return new Unknown64(machInst);
+            shiftAmt = ((immh << 3) | immb) - (8 << size);
+            if (u)
+                return decodeNeonUTwoShiftSReg<UshllX, Ushll2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftSReg<SshllX, Sshll2X>(
+                    q, size, machInst, vd, vn, shiftAmt);
+          case 0x1c:
+            if (immh < 0x4 || immh3_q == 0x2)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u) {
+                return decodeNeonUTwoShiftFpReg<UcvtfFixedDX, UcvtfFixedQX>(
+                    q, size & 0x1, machInst, vd, vn, shiftAmt);
+            } else {
+                if (q) {
+                    if (size & 0x1)
+                        return new ScvtfFixedDQX<uint64_t>(machInst, vd, vn,
+                                                           shiftAmt);
+                    else
+                        return new ScvtfFixedSQX<uint32_t>(machInst, vd, vn,
+                                                           shiftAmt);
+                } else {
+                    if (size & 0x1)
+                        return new Unknown(machInst);
+                    else
+                        return new ScvtfFixedDX<uint32_t>(machInst, vd, vn,
+                                                          shiftAmt);
+                }
+            }
+          case 0x1f:
+            if (immh < 0x4 || immh3_q == 0x2)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftFpReg<FcvtzuFixedDX, FcvtzuFixedQX>(
+                    q, size & 0x1, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonUTwoShiftFpReg<FcvtzsFixedDX, FcvtzsFixedQX>(
+                    q, size & 0x1, machInst, vd, vn, shiftAmt);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonTblTbx(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        uint8_t switchVal = bits(machInst, 14, 12);
+
+        switch (switchVal) {
+          case 0x0:
+            if (q)
+                return new Tbl1QX<uint8_t>(machInst, vd, vn, vm);
+            else
+                return new Tbl1DX<uint8_t>(machInst, vd, vn, vm);
+          case 0x1:
+            if (q)
+                return new Tbx1QX<uint8_t>(machInst, vd, vn, vm);
+            else
+                return new Tbx1DX<uint8_t>(machInst, vd, vn, vm);
+          case 0x2:
+            if (q)
+                return new Tbl2QX<uint8_t>(machInst, vd, vn, vm);
+            else
+                return new Tbl2DX<uint8_t>(machInst, vd, vn, vm);
+          case 0x3:
+            if (q)
+                return new Tbx2QX<uint8_t>(machInst, vd, vn, vm);
+            else
+                return new Tbx2DX<uint8_t>(machInst, vd, vn, vm);
+          case 0x4:
+            if (q)
+                return new Tbl3QX<uint8_t>(machInst, vd, vn, vm);
+            else
+                return new Tbl3DX<uint8_t>(machInst, vd, vn, vm);
+          case 0x5:
+            if (q)
+                return new Tbx3QX<uint8_t>(machInst, vd, vn, vm);
+            else
+                return new Tbx3DX<uint8_t>(machInst, vd, vn, vm);
+          case 0x6:
+            if (q)
+                return new Tbl4QX<uint8_t>(machInst, vd, vn, vm);
+            else
+                return new Tbl4DX<uint8_t>(machInst, vd, vn, vm);
+          case 0x7:
+            if (q)
+                return new Tbx4QX<uint8_t>(machInst, vd, vn, vm);
+            else
+                return new Tbx4DX<uint8_t>(machInst, vd, vn, vm);
+          default:
+            return new Unknown64(machInst);
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeNeonZipUzpTrn(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 14, 12);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        switch (opcode) {
+          case 0x1:
+            return decodeNeonUThreeXReg<Uzp1DX, Uzp1QX>(
+                q, size, machInst, vd, vn, vm);
+          case 0x2:
+            return decodeNeonUThreeXReg<Trn1DX, Trn1QX>(
+                q, size, machInst, vd, vn, vm);
+          case 0x3:
+            return decodeNeonUThreeXReg<Zip1DX, Zip1QX>(
+                q, size, machInst, vd, vn, vm);
+          case 0x5:
+            return decodeNeonUThreeXReg<Uzp2DX, Uzp2QX>(
+                q, size, machInst, vd, vn, vm);
+          case 0x6:
+            return decodeNeonUThreeXReg<Trn2DX, Trn2QX>(
+                q, size, machInst, vd, vn, vm);
+          case 0x7:
+            return decodeNeonUThreeXReg<Zip2DX, Zip2QX>(
+                q, size, machInst, vd, vn, vm);
+          default:
+            return new Unknown64(machInst);
+        }
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeNeonExt(ExtMachInst machInst)
+    {
+        uint8_t q = bits(machInst, 30);
+        uint8_t op2 = bits(machInst, 23, 22);
+        uint8_t imm4 = bits(machInst, 14, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        if (op2 != 0 || (q == 0x0 && bits(imm4, 3) == 0x1))
+            return new Unknown64(machInst);
+
+        uint8_t index = q ? imm4 : imm4 & 0x7;
+
+        if (q) {
+            return new ExtQX<uint8_t>(machInst, vd, vn, vm, index);
+        } else {
+            return new ExtDX<uint8_t>(machInst, vd, vn, vm, index);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonSc3Same(ExtMachInst machInst)
+    {
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 15, 11);
+        uint8_t s = bits(machInst, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        switch (opcode) {
+          case 0x01:
+            if (u)
+                return decodeNeonUThreeUReg<UqaddScX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeUReg<SqaddScX>(
+                    size, machInst, vd, vn, vm);
+          case 0x05:
+            if (u)
+                return decodeNeonUThreeUReg<UqsubScX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeUReg<SqsubScX>(
+                    size, machInst, vd, vn, vm);
+          case 0x06:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return new CmhiDX<uint64_t>(machInst, vd, vn, vm);
+            else
+                return new CmgtDX<int64_t>(machInst, vd, vn, vm);
+          case 0x07:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return new CmhsDX<uint64_t>(machInst, vd, vn, vm);
+            else
+                return new CmgeDX<int64_t>(machInst, vd, vn, vm);
+          case 0x08:
+            if (!s && size != 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return new UshlDX<uint64_t>(machInst, vd, vn, vm);
+            else
+                return new SshlDX<int64_t>(machInst, vd, vn, vm);
+          case 0x09:
+            if (!s && size != 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeUReg<UqshlScX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeUReg<SqshlScX>(
+                    size, machInst, vd, vn, vm);
+          case 0x0a:
+            if (!s && size != 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return new UrshlDX<uint64_t>(machInst, vd, vn, vm);
+            else
+                return new SrshlDX<int64_t>(machInst, vd, vn, vm);
+          case 0x0b:
+            if (!s && size != 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeUReg<UqrshlScX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeUReg<SqrshlScX>(
+                    size, machInst, vd, vn, vm);
+          case 0x10:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return new SubDX<uint64_t>(machInst, vd, vn, vm);
+            else
+                return new AddDX<uint64_t>(machInst, vd, vn, vm);
+          case 0x11:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return new CmeqDX<uint64_t>(machInst, vd, vn, vm);
+            else
+                return new CmtstDX<uint64_t>(machInst, vd, vn, vm);
+          case 0x16:
+            if (size == 0x3 || size == 0x0)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonSThreeHAndWReg<SqrdmulhScX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeHAndWReg<SqdmulhScX>(
+                    size, machInst, vd, vn, vm);
+          case 0x1a:
+            if (!u || size < 0x2)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUThreeScFpReg<FabdScX>(
+                    size & 0x1, machInst, vd, vn, vm);
+          case 0x1b:
+            if (u || size > 0x1)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUThreeScFpReg<FmulxScX>(
+                    size & 0x1, machInst, vd, vn, vm);
+          case 0x1c:
+            if (size < 0x2) {
+                if (u)
+                    return decodeNeonUThreeScFpReg<FcmgeScX>(
+                        size & 0x1, machInst, vd, vn, vm);
+                else
+                    return decodeNeonUThreeScFpReg<FcmeqScX>(
+                        size & 0x1, machInst, vd, vn, vm);
+            } else {
+                if (u)
+                    return decodeNeonUThreeScFpReg<FcmgtScX>(
+                        size & 0x1, machInst, vd, vn, vm);
+                else
+                    return new Unknown64(machInst);
+            }
+          case 0x1d:
+            if (!u)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUThreeScFpReg<FacgeScX>(
+                    size & 0x1, machInst, vd, vn, vm);
+            else
+                return decodeNeonUThreeScFpReg<FacgtScX>(
+                    size & 0x1, machInst, vd, vn, vm);
+          case 0x1f:
+            if (u)
+                return new Unknown64(machInst);
+            if (size < 0x2)
+                return decodeNeonUThreeScFpReg<FrecpsScX>(
+                    size & 0x1, machInst, vd, vn, vm);
+            else
+                return decodeNeonUThreeScFpReg<FrsqrtsScX>(
+                    size & 0x1, machInst, vd, vn, vm);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonSc3Diff(ExtMachInst machInst)
+    {
+        if (bits(machInst, 29))
+            return new Unknown64(machInst);
+
+        uint8_t size = bits(machInst, 23, 22);
+        if (size == 0x0 || size == 0x3)
+            return new Unknown64(machInst);
+
+        uint8_t opcode = bits(machInst, 15, 12);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        switch (opcode) {
+          case 0x9:
+            return decodeNeonSThreeHAndWReg<SqdmlalScX>(size, machInst, vd, vn, vm);
+          case 0xb:
+            return decodeNeonSThreeHAndWReg<SqdmlslScX>(size, machInst, vd, vn, vm);
+          case 0xd:
+            return decodeNeonSThreeHAndWReg<SqdmullScX>(size, machInst, vd, vn, vm);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonSc2RegMisc(ExtMachInst machInst)
+    {
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 16, 12);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+
+        uint8_t switchVal = opcode | ((u ? 1 : 0) << 5);
+        switch (switchVal) {
+          case 0x03:
+            return decodeNeonUTwoMiscUReg<SuqaddScX>(size, machInst, vd, vn);
+          case 0x07:
+            return decodeNeonSTwoMiscUReg<SqabsScX>(size, machInst, vd, vn);
+          case 0x08:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            else
+                return new CmgtZeroDX<int64_t>(machInst, vd, vn);
+          case 0x09:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            else
+                return new CmeqZeroDX<int64_t>(machInst, vd, vn);
+          case 0x0a:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            else
+                return new CmltZeroDX<int64_t>(machInst, vd, vn);
+          case 0x0b:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            else
+                return new AbsDX<int64_t>(machInst, vd, vn);
+          case 0x0c:
+            if (size < 0x2)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcmgtZeroScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x0d:
+            if (size < 0x2)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcmeqZeroScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x0e:
+            if (size < 0x2)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcmltZeroScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x14:
+            if (size == 0x3) {
+                return new Unknown64(machInst);
+            } else {
+                switch (size) {
+                  case 0x0:
+                    return new SqxtnScX<int8_t>(machInst, vd, vn);
+                  case 0x1:
+                    return new SqxtnScX<int16_t>(machInst, vd, vn);
+                  case 0x2:
+                    return new SqxtnScX<int32_t>(machInst, vd, vn);
+                }
+            }
+          case 0x1a:
+            if (size < 0x2)
+                return decodeNeonUTwoMiscScFpReg<FcvtnsScX>(
+                    size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcvtpsScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x1b:
+            if (size < 0x2)
+                return decodeNeonUTwoMiscScFpReg<FcvtmsScX>(
+                    size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcvtzsIntScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x1c:
+            if (size < 0x2)
+                return decodeNeonUTwoMiscScFpReg<FcvtasScX>(
+                    size & 0x1, machInst, vd, vn);
+            else
+                return new Unknown64(machInst);
+          case 0x1d:
+            if (size < 0x2) {
+                if (size & 0x1)
+                    return new ScvtfIntScDX<uint64_t>(machInst, vd, vn);
+                else
+                    return new ScvtfIntScSX<uint32_t>(machInst, vd, vn);
+            } else {
+                return decodeNeonUTwoMiscScFpReg<FrecpeScX>(
+                    size & 0x1, machInst, vd, vn);
+            }
+          case 0x1f:
+            if (size < 0x2)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUTwoMiscScFpReg<FrecpxX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x23:
+            return decodeNeonUTwoMiscUReg<UsqaddScX>(size, machInst, vd, vn);
+          case 0x27:
+            return decodeNeonSTwoMiscUReg<SqnegScX>(size, machInst, vd, vn);
+          case 0x28:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            else
+                return new CmgeZeroDX<int64_t>(machInst, vd, vn);
+          case 0x29:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            else
+                return new CmleZeroDX<int64_t>(machInst, vd, vn);
+          case 0x2b:
+            if (size != 0x3)
+                return new Unknown64(machInst);
+            else
+                return new NegDX<int64_t>(machInst, vd, vn);
+          case 0x2c:
+            if (size < 0x2)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcmgeZeroScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x2d:
+            if (size < 0x2)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcmleZeroScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x32:
+            if (size == 0x3) {
+                return new Unknown64(machInst);
+            } else {
+                switch (size) {
+                  case 0x0:
+                    return new SqxtunScX<int8_t>(machInst, vd, vn);
+                  case 0x1:
+                    return new SqxtunScX<int16_t>(machInst, vd, vn);
+                  case 0x2:
+                    return new SqxtunScX<int32_t>(machInst, vd, vn);
+                }
+            }
+          case 0x34:
+            if (size == 0x3) {
+                return new Unknown64(machInst);
+            } else {
+                switch (size) {
+                  case 0x0:
+                    return new UqxtnScX<uint8_t>(machInst, vd, vn);
+                  case 0x1:
+                    return new UqxtnScX<uint16_t>(machInst, vd, vn);
+                  case 0x2:
+                    return new UqxtnScX<uint32_t>(machInst, vd, vn);
+                }
+            }
+          case 0x36:
+            if (size != 0x1) {
+                return new Unknown64(machInst);
+            } else {
+                return new FcvtxnScX<uint32_t>(machInst, vd, vn);
+            }
+          case 0x3a:
+            if (size < 0x2)
+                return decodeNeonUTwoMiscScFpReg<FcvtnuScX>(
+                    size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcvtpuScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x3b:
+            if (size < 0x2)
+                return decodeNeonUTwoMiscScFpReg<FcvtmuScX>(
+                    size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscScFpReg<FcvtzuIntScX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x3c:
+            if (size < 0x2)
+                return decodeNeonUTwoMiscScFpReg<FcvtauScX>(
+                    size & 0x1, machInst, vd, vn);
+            else
+                return new Unknown64(machInst);
+          case 0x3d:
+            if (size < 0x2)
+                return decodeNeonUTwoMiscScFpReg<UcvtfIntScX>(
+                    size & 0x1, machInst, vd, vn);
+            else
+                return decodeNeonUTwoMiscScFpReg<FrsqrteScX>(
+                    size & 0x1, machInst, vd, vn);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonScPwise(ExtMachInst machInst)
+    {
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 16, 12);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+
+        if (!u) {
+            if (opcode == 0x1b && size == 0x3)
+                return new AddpScQX<uint64_t>(machInst, vd, vn);
+            else
+                return new Unknown64(machInst);
+        }
+
+        uint8_t switchVal = (opcode << 0) | (size << 5);
+        switch (switchVal) {
+          case 0x0c:
+          case 0x2c:
+            return decodeNeonUTwoMiscPwiseScFpReg<FmaxnmpScDX, FmaxnmpScQX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x0d:
+          case 0x2d:
+            return decodeNeonUTwoMiscPwiseScFpReg<FaddpScDX, FaddpScQX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x0f:
+          case 0x2f:
+            return decodeNeonUTwoMiscPwiseScFpReg<FmaxpScDX, FmaxpScQX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x4c:
+          case 0x6c:
+            return decodeNeonUTwoMiscPwiseScFpReg<FminnmpScDX, FminnmpScQX>(
+                    size & 0x1, machInst, vd, vn);
+          case 0x4f:
+          case 0x6f:
+            return decodeNeonUTwoMiscPwiseScFpReg<FminpScDX, FminpScQX>(
+                    size & 0x1, machInst, vd, vn);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonScCopy(ExtMachInst machInst)
+    {
+        if (bits(machInst, 14, 11) != 0 || bits(machInst, 29))
+            return new Unknown64(machInst);
+
+        uint8_t imm5 = bits(machInst, 20, 16);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+
+        uint8_t size = findLsbSet(imm5);
+        uint8_t index = bits(imm5, 4, size + 1);
+
+        return decodeNeonUTwoShiftUReg<DupElemScX>(
+            size, machInst, vd, vn, index);
+    }
+
+    StaticInstPtr
+    decodeNeonScIndexedElem(ExtMachInst machInst)
+    {
+        uint8_t u = bits(machInst, 29);
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t L = bits(machInst, 21);
+        uint8_t M = bits(machInst, 20);
+        uint8_t opcode = bits(machInst, 15, 12);
+        uint8_t H = bits(machInst, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm_bf = (IntRegIndex) (uint8_t) bits(machInst, 19, 16);
+
+        uint8_t index = 0;
+        uint8_t index_fp = 0;
+        uint8_t vmh = 0;
+        uint8_t sz_L = bits(machInst, 22, 21);
+
+        // Index and 2nd register operand for integer instructions
+        if (size == 0x1) {
+            index = (H << 2) | (L << 1) | M;
+            // vmh = 0;
+        } else if (size == 0x2) {
+            index = (H << 1) | L;
+            vmh = M;
+        } else if (size == 0x3) {
+            index = H;
+            vmh = M;
+        }
+        IntRegIndex vm = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);
+
+        // Index and 2nd register operand for FP instructions
+        vmh = M;
+        if ((size & 0x1) == 0) {
+            index_fp = (H << 1) | L;
+        } else if (L == 0) {
+            index_fp = H;
+        }
+        IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);
+
+        if (u && opcode != 9)
+            return new Unknown64(machInst);
+
+        switch (opcode) {
+          case 0x1:
+            if (size < 2 || sz_L == 0x3)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUThreeImmScFpReg<FmlaElemScX>(
+                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
+          case 0x3:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqdmlalElemScX>(
+                    size, machInst, vd, vn, vm, index);
+          case 0x5:
+            if (size < 2 || sz_L == 0x3)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonUThreeImmScFpReg<FmlsElemScX>(
+                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
+          case 0x7:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqdmlslElemScX>(
+                    size, machInst, vd, vn, vm, index);
+          case 0x9:
+            if (size < 2 || sz_L == 0x3)
+                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
+                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
+            else
+                return decodeNeonUThreeImmScFpReg<FmulElemScX>(
+                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
+          case 0xb:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqdmullElemScX>(
+                    size, machInst, vd, vn, vm, index);
+          case 0xc:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>(
+                    size, machInst, vd, vn, vm, index);
+          case 0xd:
+            if (size == 0x0 || size == 0x3)
+                return new Unknown64(machInst);
+            else
+                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
+                    size, machInst, vd, vn, vm, index);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonScShiftByImm(ExtMachInst machInst)
+    {
+        bool u = bits(machInst, 29);
+        uint8_t immh = bits(machInst, 22, 19);
+        uint8_t immb = bits(machInst, 18, 16);
+        uint8_t opcode = bits(machInst, 15, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+
+        uint8_t immh3 = bits(machInst, 22);
+        uint8_t size = findMsbSet(immh);
+        int shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+
+        if (immh == 0x0)
+            return new Unknown64(machInst);
+
+        switch (opcode) {
+          case 0x00:
+            if (!immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return new UshrDX<uint64_t>(machInst, vd, vn, shiftAmt);
+            else
+                return new SshrDX<int64_t>(machInst, vd, vn, shiftAmt);
+          case 0x02:
+            if (!immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return new UsraDX<uint64_t>(machInst, vd, vn, shiftAmt);
+            else
+                return new SsraDX<int64_t>(machInst, vd, vn, shiftAmt);
+          case 0x04:
+            if (!immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return new UrshrDX<uint64_t>(machInst, vd, vn, shiftAmt);
+            else
+                return new SrshrDX<int64_t>(machInst, vd, vn, shiftAmt);
+          case 0x06:
+            if (!immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return new UrsraDX<uint64_t>(machInst, vd, vn, shiftAmt);
+            else
+                return new SrsraDX<int64_t>(machInst, vd, vn, shiftAmt);
+          case 0x08:
+            if (!immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return new SriDX<uint64_t>(machInst, vd, vn, shiftAmt);
+            else
+                return new Unknown64(machInst);
+          case 0x0a:
+            if (!immh3)
+                return new Unknown64(machInst);
+            shiftAmt = ((immh << 3) | immb) - (8 << size);
+            if (u)
+                return new SliDX<uint64_t>(machInst, vd, vn, shiftAmt);
+            else
+                return new ShlDX<uint64_t>(machInst, vd, vn, shiftAmt);
+          case 0x0c:
+            if (u) {
+                shiftAmt = ((immh << 3) | immb) - (8 << size);
+                return decodeNeonSTwoShiftUReg<SqshluScX>(
+                    size, machInst, vd, vn, shiftAmt);
+            } else {
+                return new Unknown64(machInst);
+            }
+          case 0x0e:
+            shiftAmt = ((immh << 3) | immb) - (8 << size);
+            if (u)
+                return decodeNeonUTwoShiftUReg<UqshlImmScX>(
+                    size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftUReg<SqshlImmScX>(
+                    size, machInst, vd, vn, shiftAmt);
+          case 0x10:
+            if (!u || immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            return decodeNeonSTwoShiftUSReg<SqshrunScX>(
+                size, machInst, vd, vn, shiftAmt);
+          case 0x11:
+            if (!u || immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            return decodeNeonSTwoShiftUSReg<SqrshrunScX>(
+                size, machInst, vd, vn, shiftAmt);
+          case 0x12:
+            if (immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftUSReg<UqshrnScX>(
+                    size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftUSReg<SqshrnScX>(
+                    size, machInst, vd, vn, shiftAmt);
+          case 0x13:
+            if (immh3)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftUSReg<UqrshrnScX>(
+                    size, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonSTwoShiftUSReg<SqrshrnScX>(
+                    size, machInst, vd, vn, shiftAmt);
+          case 0x1c:
+            if (immh < 0x4)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u) {
+                return decodeNeonUTwoShiftUFpReg<UcvtfFixedScX>(
+                    size & 0x1, machInst, vd, vn, shiftAmt);
+            } else {
+                if (size & 0x1)
+                    return new ScvtfFixedScDX<uint64_t>(machInst, vd, vn,
+                                                        shiftAmt);
+                else
+                    return new ScvtfFixedScSX<uint32_t>(machInst, vd, vn,
+                                                        shiftAmt);
+            }
+          case 0x1f:
+            if (immh < 0x4)
+                return new Unknown64(machInst);
+            shiftAmt = (8 << (size + 1)) - ((immh << 3) | immb);
+            if (u)
+                return decodeNeonUTwoShiftUFpReg<FcvtzuFixedScX>(
+                    size & 0x1, machInst, vd, vn, shiftAmt);
+            else
+                return decodeNeonUTwoShiftUFpReg<FcvtzsFixedScX>(
+                    size & 0x1, machInst, vd, vn, shiftAmt);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeNeonMem(ExtMachInst machInst)
+    {
+        uint8_t dataSize = bits(machInst, 30) ? 128 : 64;
+        bool multiple = bits(machInst, 24, 23) < 0x2;
+        bool load = bits(machInst, 22);
+
+        uint8_t numStructElems = 0;
+        uint8_t numRegs = 0;
+
+        if (multiple) {  // AdvSIMD load/store multiple structures
+            uint8_t opcode = bits(machInst, 15, 12);
+            uint8_t eSize = bits(machInst, 11, 10);
+            bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23));
+
+            switch (opcode) {
+              case 0x0:  // LD/ST4 (4 regs)
+                numStructElems = 4;
+                numRegs = 4;
+                break;
+              case 0x2:  // LD/ST1 (4 regs)
+                numStructElems = 1;
+                numRegs = 4;
+                break;
+              case 0x4:  // LD/ST3 (3 regs)
+                numStructElems = 3;
+                numRegs = 3;
+                break;
+              case 0x6:  // LD/ST1 (3 regs)
+                numStructElems = 1;
+                numRegs = 3;
+                break;
+              case 0x7:  // LD/ST1 (1 reg)
+                numStructElems = 1;
+                numRegs = 1;
+                break;
+              case 0x8:  // LD/ST2 (2 regs)
+                numStructElems = 2;
+                numRegs = 2;
+                break;
+              case 0xa:  // LD/ST1 (2 regs)
+                numStructElems = 1;
+                numRegs = 2;
+                break;
+              default:
+                return new Unknown64(machInst);
+            }
+
+            IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+            IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+            IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+            if (load) {
+                return new VldMult64(machInst, rn, vd, rm, eSize, dataSize,
+                                     numStructElems, numRegs, wb);
+            } else {
+                return new VstMult64(machInst, rn, vd, rm, eSize, dataSize,
+                                     numStructElems, numRegs, wb);
+            }
+        } else {  // AdvSIMD load/store single structure
+            uint8_t scale = bits(machInst, 15, 14);
+            uint8_t numStructElems = (((uint8_t) bits(machInst, 13) << 1) |
+                                      (uint8_t) bits(machInst, 21)) + 1;
+            uint8_t index = 0;
+            bool wb = !(bits(machInst, 20, 16) == 0x0 && !bits(machInst, 23));
+            bool replicate = false;
+
+            switch (scale) {
+              case 0x0:
+                index = ((uint8_t) bits(machInst, 30) << 3) |
+                    ((uint8_t) bits(machInst, 12) << 2) |
+                    (uint8_t) bits(machInst, 11, 10);
+                break;
+              case 0x1:
+                index = ((uint8_t) bits(machInst, 30) << 2) |
+                    ((uint8_t) bits(machInst, 12) << 1) |
+                    (uint8_t) bits(machInst, 11);
+                break;
+              case 0x2:
+                if (bits(machInst, 10) == 0x0) {
+                    index = ((uint8_t) bits(machInst, 30) << 1) |
+                        bits(machInst, 12);
+                } else {
+                    index = (uint8_t) bits(machInst, 30);
+                    scale = 0x3;
+                }
+                break;
+              case 0x3:
+                scale = bits(machInst, 11, 10);
+                replicate = true;
+                break;
+              default:
+                return new Unknown64(machInst);
+            }
+
+            uint8_t eSize = scale;
+
+            IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+            IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+            IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+            if (load) {
+                return new VldSingle64(machInst, rn, vd, rm, eSize, dataSize,
+                                       numStructElems, index, wb, replicate);
+            } else {
+                return new VstSingle64(machInst, rn, vd, rm, eSize, dataSize,
+                                       numStructElems, index, wb, replicate);
+            }
+        }
+    }
+}
+}};
diff --git a/src/arch/arm/isa/formats/uncond.isa b/src/arch/arm/isa/formats/uncond.isa
index 4a18a55bb..c376cd9ce 100644
--- a/src/arch/arm/isa/formats/uncond.isa
+++ b/src/arch/arm/isa/formats/uncond.isa
@@ -99,11 +99,11 @@ def format ArmUnconditional() {{
                       case 0x1:
                         return new Clrex(machInst);
                       case 0x4:
-                        return new Dsb(machInst);
+                        return new Dsb(machInst, 0);
                       case 0x5:
-                        return new Dmb(machInst);
+                        return new Dmb(machInst, 0);
                       case 0x6:
-                        return new Isb(machInst);
+                        return new Isb(machInst, 0);
                     }
                 }
             } else if (bits(op2, 0) == 0) {
@@ -166,7 +166,7 @@ def format ArmUnconditional() {{
                     const uint32_t val = ((machInst >> 20) & 0x5);
                     if (val == 0x4) {
                         const uint32_t mode = bits(machInst, 4, 0);
-                        if (badMode((OperatingMode)mode))
+                        if (badMode32((OperatingMode)mode))
                             return new Unknown(machInst);
                         switch (bits(machInst, 24, 21)) {
                           case 0x2:
@@ -250,17 +250,10 @@ def format ArmUnconditional() {{
                                         "ldc, ldc2 (immediate)", machInst);
                             }
                         }
-                        if (op1 == 0xC5) {
-                            return new WarnUnimplemented(
-                                    "mrrc, mrrc2", machInst);
-                        }
                     } else {
                         if (bits(op1, 4, 3) != 0 || bits(op1, 1) == 1) {
                             return new WarnUnimplemented(
                                     "stc, stc2", machInst);
-                        } else if (op1 == 0xC4) {
-                            return new WarnUnimplemented(
-                                    "mcrr, mcrrc", machInst);
                         }
                     }
                 }
diff --git a/src/arch/arm/isa/formats/unimp.isa b/src/arch/arm/isa/formats/unimp.isa
index 1c9a4b402..8e346112c 100644
--- a/src/arch/arm/isa/formats/unimp.isa
+++ b/src/arch/arm/isa/formats/unimp.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2012 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -85,6 +85,9 @@ output header {{
       private:
         /// Have we warned on this instruction yet?
         mutable bool warned;
+        /// Full mnemonic for MRC and MCR instructions including the
+        /// coproc. register name
+        std::string fullMnemonic;
 
       public:
         /// Constructor
@@ -96,6 +99,16 @@ output header {{
             flags[IsNonSpeculative] = true;
         }
 
+        WarnUnimplemented(const char *_mnemonic, ExtMachInst _machInst,
+                          const std::string& _fullMnemonic)
+            : ArmStaticInst(_mnemonic, _machInst, No_OpClass), warned(false),
+              fullMnemonic(_fullMnemonic)
+        {
+            // don't call execute() (which panics) if we're on a
+            // speculative path
+            flags[IsNonSpeculative] = true;
+        }
+
         %(BasicExecDeclare)s
 
         std::string
@@ -147,10 +160,7 @@ output exec {{
     FailUnimplemented::execute(%(CPU_exec_context)s *xc,
                                Trace::InstRecord *traceData) const
     {
-        if (FullSystem)
-            return new UndefinedInstruction;
-        else
-            return new UndefinedInstruction(machInst, false, mnemonic);
+        return new UndefinedInstruction(machInst, false, mnemonic);
     }
 
     Fault
@@ -158,7 +168,8 @@ output exec {{
                                Trace::InstRecord *traceData) const
     {
         if (!warned) {
-            warn("\tinstruction '%s' unimplemented\n", mnemonic);
+            warn("\tinstruction '%s' unimplemented\n",
+                 fullMnemonic.size() ? fullMnemonic.c_str() : mnemonic);
             warned = true;
         }
 
diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa
index 5dd13d623..a2ce84345 100644
--- a/src/arch/arm/isa/includes.isa
+++ b/src/arch/arm/isa/includes.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2012 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -50,10 +50,16 @@ output header {{
 #include <sstream>
 
 #include "arch/arm/insts/branch.hh"
+#include "arch/arm/insts/branch64.hh"
+#include "arch/arm/insts/data64.hh"
+#include "arch/arm/insts/fplib.hh"
 #include "arch/arm/insts/macromem.hh"
 #include "arch/arm/insts/mem.hh"
+#include "arch/arm/insts/mem64.hh"
 #include "arch/arm/insts/misc.hh"
+#include "arch/arm/insts/misc64.hh"
 #include "arch/arm/insts/mult.hh"
+#include "arch/arm/insts/neon64_mem.hh"
 #include "arch/arm/insts/pred_inst.hh"
 #include "arch/arm/insts/static_inst.hh"
 #include "arch/arm/insts/vfp.hh"
@@ -63,6 +69,7 @@ output header {{
 }};
 
 output decoder {{
+#include <string>
 #include "arch/arm/decoder.hh"
 #include "arch/arm/faults.hh"
 #include "arch/arm/intregs.hh"
diff --git a/src/arch/arm/isa/insts/aarch64.isa b/src/arch/arm/isa/insts/aarch64.isa
new file mode 100644
index 000000000..6fcf9b5d2
--- /dev/null
+++ b/src/arch/arm/isa/insts/aarch64.isa
@@ -0,0 +1,58 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+    movzCode = 'Dest64 = ((uint64_t)imm1) << imm2;'
+    movzIop = InstObjParams("movz", "Movz", "RegImmImmOp", movzCode, [])
+    header_output += RegImmImmOpDeclare.subst(movzIop)
+    decoder_output += RegImmImmOpConstructor.subst(movzIop)
+    exec_output += BasicExecute.subst(movzIop)
+
+    movkCode = 'Dest64 = insertBits(Dest64, imm2 + 15, imm2, imm1);'
+    movkIop = InstObjParams("movk", "Movk", "RegImmImmOp", movkCode, [])
+    header_output += RegImmImmOpDeclare.subst(movkIop)
+    decoder_output += RegImmImmOpConstructor.subst(movkIop)
+    exec_output += BasicExecute.subst(movkIop)
+
+    movnCode = 'Dest64 = ~(((uint64_t)imm1) << imm2);'
+    movnIop = InstObjParams("movn", "Movn", "RegImmImmOp", movnCode, [])
+    header_output += RegImmImmOpDeclare.subst(movnIop)
+    decoder_output += RegImmImmOpConstructor.subst(movnIop)
+    exec_output += BasicExecute.subst(movnIop)
+}};
diff --git a/src/arch/arm/isa/insts/branch.isa b/src/arch/arm/isa/insts/branch.isa
index e360f4581..3ee9d88e4 100644
--- a/src/arch/arm/isa/insts/branch.isa
+++ b/src/arch/arm/isa/insts/branch.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2012 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -48,7 +48,7 @@ let {{
         bCode = '''
         NPC = (uint32_t)(PC + imm);
         '''
-        br_tgt_code = '''pcs.instNPC(branchPC.instPC() + imm);'''
+        br_tgt_code = '''pcs.instNPC((uint32_t)(branchPC.instPC() + imm));'''
         instFlags = ["IsDirectControl"]
         if (link):
             bCode += '''
@@ -86,9 +86,9 @@ let {{
             Name += "Imm"
             # Since we're switching ISAs, the target ISA will be the opposite
             # of the current ISA. Thumb is whether the target is ARM.
-            newPC = '(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))'
+            newPC = '(uint32_t)(Thumb ? (roundDown(PC, 4) + imm) : (PC + imm))'
             br_tgt_code = '''
-            pcs.instNPC((branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) :
+            pcs.instNPC((uint32_t)(branchPC.thumb() ? (roundDown(branchPC.instPC(),4) + imm) :
                                 (branchPC.instPC() + imm)));
             '''
             base = "BranchImmCond"
@@ -150,7 +150,26 @@ let {{
         if imm:
             decoder_output += BranchTarget.subst(blxIop)
 
-    #Ignore BXJ for now
+    bxjcode = '''
+    HSTR hstr = Hstr;
+    CPSR cpsr = Cpsr;
+    SCR  scr  = Scr;
+
+    if (ArmSystem::haveVirtualization(xc->tcBase()) && hstr.tjdbx &&
+        !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) {
+        fault = new HypervisorTrap(machInst, op1, EC_TRAPPED_BXJ);
+    }
+    IWNPC = Op1;
+    '''
+
+    bxjIop = InstObjParams("bxj", "BxjReg", "BranchRegCond",
+                           {"code": bxjcode,
+                            "predicate_test": predicateTest,
+                            "is_ras_pop": "op1 == INTREG_LR" },
+                           ["IsIndirectControl"])
+    header_output += BranchRegCondDeclare.subst(bxjIop)
+    decoder_output += BranchRegCondConstructor.subst(bxjIop)
+    exec_output += PredOpExecute.subst(bxjIop)
 
     #CBNZ, CBZ. These are always unconditional as far as predicates
     for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")):
diff --git a/src/arch/arm/isa/insts/branch64.isa b/src/arch/arm/isa/insts/branch64.isa
new file mode 100644
index 000000000..89cee6c22
--- /dev/null
+++ b/src/arch/arm/isa/insts/branch64.isa
@@ -0,0 +1,248 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+//          Giacomo Gabrielli
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    # B, BL
+    for (mnem, link) in (("b", False), ("bl", True)):
+        bCode = ('NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(), '
+                 'currEL(xc->tcBase()));\n')
+        instFlags = ['IsDirectControl', 'IsUncondControl']
+        if (link):
+            bCode += 'XLR = RawPC + 4;\n'
+            instFlags += ['IsCall']
+
+        bIop = InstObjParams(mnem, mnem.capitalize() + "64",
+                             "BranchImm64", bCode, instFlags)
+        header_output += BranchImm64Declare.subst(bIop)
+        decoder_output += BranchImm64Constructor.subst(bIop)
+        exec_output += BasicExecute.subst(bIop)
+
+    # BR, BLR
+    for (mnem, link) in (("br", False), ("blr", True)):
+        bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), '
+                 'currEL(xc->tcBase()));\n')
+        instFlags = ['IsIndirectControl', 'IsUncondControl']
+        if (link):
+            bCode += 'XLR = RawPC + 4;\n'
+            instFlags += ['IsCall']
+
+        bIop = InstObjParams(mnem, mnem.capitalize() + "64",
+                             "BranchReg64", bCode, instFlags)
+        header_output += BranchReg64Declare.subst(bIop)
+        decoder_output += BranchReg64Constructor.subst(bIop)
+        exec_output += BasicExecute.subst(bIop)
+
+    # B conditional
+    bCode = '''
+        if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode))
+            NPC = purifyTaggedAddr(RawPC + imm, xc->tcBase(),
+                                   currEL(xc->tcBase()));
+        else
+            NPC = NPC;
+    '''
+    bIop = InstObjParams("b", "BCond64", "BranchImmCond64", bCode,
+                         ['IsCondControl', 'IsDirectControl'])
+    header_output += BranchImmCond64Declare.subst(bIop)
+    decoder_output += BranchImmCond64Constructor.subst(bIop)
+    exec_output += BasicExecute.subst(bIop)
+
+    # RET
+    bCode = ('NPC = purifyTaggedAddr(XOp1, xc->tcBase(), '
+             'currEL(xc->tcBase()));\n')
+    instFlags = ['IsIndirectControl', 'IsUncondControl', 'IsReturn']
+
+    bIop = InstObjParams('ret', 'Ret64', "BranchRet64", bCode, instFlags)
+    header_output += BranchReg64Declare.subst(bIop)
+    decoder_output += BranchReg64Constructor.subst(bIop)
+    exec_output += BasicExecute.subst(bIop)
+
+    # ERET
+    bCode = '''Addr newPc;
+                CPSR cpsr = Cpsr;
+                CPSR spsr = Spsr;
+
+                ExceptionLevel curr_el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode);
+                switch (curr_el) {
+                  case EL3:
+                    newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL3);
+                    break;
+                  case EL2:
+                    newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL2);
+                    break;
+                  case EL1:
+                    newPc = xc->tcBase()->readMiscReg(MISCREG_ELR_EL1);
+                    break;
+                  default:
+                    return new UndefinedInstruction(machInst, false, mnemonic);
+                    break;
+                }
+                if (spsr.width && (newPc & mask(2))) {
+                    // To avoid PC Alignment fault when returning to AArch32
+                    if (spsr.t)
+                        newPc = newPc & ~mask(1);
+                    else
+                        newPc = newPc & ~mask(2);
+                }
+                spsr.q = 0;
+                spsr.it1 = 0;
+                spsr.j = 0;
+                spsr.res0_23_22 = 0;
+                spsr.ge = 0;
+                spsr.it2 = 0;
+                spsr.t = 0;
+
+                OperatingMode mode = (OperatingMode) (uint8_t) spsr.mode;
+                bool illegal = false;
+                ExceptionLevel target_el;
+                if (badMode(mode)) {
+                    illegal = true;
+                } else {
+                    target_el = opModeToEL(mode);
+                    if (((target_el == EL2) &&
+                         !ArmSystem::haveVirtualization(xc->tcBase())) ||
+                            (target_el > curr_el) ||
+                            (spsr.width == 1)) {
+                        illegal = true;
+                    } else {
+                        bool known = true;
+                        bool from32 = (spsr.width == 1);
+                        bool to32 = false;
+                        if (false) { // TODO: !haveAArch32EL
+                            to32 = false;
+                        } else if (!ArmSystem::highestELIs64(xc->tcBase())) {
+                            to32 = true;
+                        } else {
+                            bool scr_rw, hcr_rw;
+                            if (ArmSystem::haveSecurity(xc->tcBase())) {
+                                SCR scr = xc->tcBase()->readMiscReg(MISCREG_SCR_EL3);
+                                scr_rw = scr.rw;
+                            } else {
+                                scr_rw = true;
+                            }
+
+                            if (ArmSystem::haveVirtualization(xc->tcBase())) {
+                                HCR hcr = xc->tcBase()->readMiscReg(MISCREG_HCR_EL2);
+                                hcr_rw = hcr.rw;
+                            } else {
+                                hcr_rw = scr_rw;
+                            }
+
+                            switch (target_el) {
+                              case EL3:
+                                to32 = false;
+                                break;
+                              case EL2:
+                                to32 = !scr_rw;
+                                break;
+                              case EL1:
+                                to32 = !scr_rw || !hcr_rw;
+                                break;
+                              case EL0:
+                                if (curr_el == EL0) {
+                                    to32 = cpsr.width;
+                                } else if (!scr_rw || !hcr_rw) {
+                                    // EL0 using AArch32 if EL1 using AArch32
+                                    to32 = true;
+                                } else {
+                                    known = false;
+                                    to32 = false;
+                                }
+                            }
+                        }
+                        if (known)
+                            illegal = (from32 != to32);
+                    }
+                }
+
+                if (illegal) {
+                    uint8_t old_mode = cpsr.mode;
+                    spsr.mode = old_mode; // Preserve old mode when invalid
+                    spsr.il = 1;
+                } else {
+                    if (cpsr.width != spsr.width)
+                        panic("AArch32/AArch64 interprocessing not supported yet");
+                }
+                Cpsr = spsr;
+
+                CondCodesNZ = spsr.nz;
+                CondCodesC  = spsr.c;
+                CondCodesV  = spsr.v;
+                NPC = purifyTaggedAddr(newPc, xc->tcBase(),
+                    opModeToEL((OperatingMode) (uint8_t) spsr.mode));
+                LLSCLock = 0;  // Clear exclusive monitor
+                SevMailbox = 1; //Set Event Register
+    '''
+    instFlags = ['IsSerializeAfter', 'IsNonSpeculative', 'IsSquashAfter']
+    bIop = InstObjParams('eret', 'Eret64', "BranchEret64", bCode, instFlags)
+    header_output += BasicDeclare.subst(bIop)
+    decoder_output += BasicConstructor64.subst(bIop)
+    exec_output += BasicExecute.subst(bIop)
+
+    # CBNZ, CBZ
+    for (mnem, test) in (("cbz", "=="), ("cbnz", "!=")):
+        code = ('NPC = (Op164 %(test)s 0) ? '
+                'purifyTaggedAddr(RawPC + imm, xc->tcBase(), '
+                'currEL(xc->tcBase())) : NPC;\n')
+        code = code % {"test": test}
+        iop = InstObjParams(mnem, mnem.capitalize() + "64",
+                            "BranchImmReg64", code,
+                            ['IsCondControl', 'IsDirectControl'])
+        header_output += BranchImmReg64Declare.subst(iop)
+        decoder_output += BranchImmReg64Constructor.subst(iop)
+        exec_output += BasicExecute.subst(iop)
+
+    # TBNZ, TBZ
+    for (mnem, test) in (("tbz", "=="), ("tbnz", "!=")):
+        code = ('NPC = ((Op164 & imm1) %(test)s 0) ? '
+                'purifyTaggedAddr(RawPC + imm2, xc->tcBase(), '
+                'currEL(xc->tcBase())) : NPC;\n')
+        code = code % {"test": test}
+        iop = InstObjParams(mnem, mnem.capitalize() + "64",
+                            "BranchImmImmReg64", code,
+                            ['IsCondControl', 'IsDirectControl'])
+        header_output += BranchImmImmReg64Declare.subst(iop)
+        decoder_output += BranchImmImmReg64Constructor.subst(iop)
+        exec_output += BasicExecute.subst(iop)
+}};
diff --git a/src/arch/arm/isa/insts/data.isa b/src/arch/arm/isa/insts/data.isa
index be56554b0..881676496 100644
--- a/src/arch/arm/isa/insts/data.isa
+++ b/src/arch/arm/isa/insts/data.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -257,7 +257,8 @@ let {{
             CPSR old_cpsr = Cpsr;
 
             CPSR new_cpsr =
-                cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi);
+                cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true,
+                                 sctlr.nmfi, xc->tcBase());
             Cpsr = ~CondCodesMask & new_cpsr;
             CondCodesNZ = new_cpsr.nz;
             CondCodesC = new_cpsr.c;
diff --git a/src/arch/arm/isa/insts/data64.isa b/src/arch/arm/isa/insts/data64.isa
new file mode 100644
index 000000000..77d7541ca
--- /dev/null
+++ b/src/arch/arm/isa/insts/data64.isa
@@ -0,0 +1,465 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    def createCcCode64(carry, overflow):
+        code = ""
+        code += '''
+            uint16_t _iz, _in;
+            _in = bits(resTemp, intWidth - 1);
+            _iz = ((resTemp & mask(intWidth)) == 0);
+            CondCodesNZ = (_in << 1) | _iz;
+            DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz);
+        '''
+        if overflow and overflow != "none":
+            code +=  '''
+                uint16_t _iv;
+                _iv = %s & 1;
+                CondCodesV = _iv;
+                DPRINTF(Arm, "(iv) = (%%d)\\n", _iv);
+            ''' % overflow
+        if carry and carry != "none":
+            code += '''
+                uint16_t _ic;
+                _ic = %s & 1;
+                CondCodesC = _ic;
+                DPRINTF(Arm, "(ic) = (%%d)\\n", _ic);
+            ''' % carry
+        return code
+
+    oldC = 'CondCodesC'
+    oldV = 'CondCodesV'
+    # Dicts of ways to set the carry flag.
+    carryCode64 = {
+        "none": "none",
+        "add": 'findCarry(intWidth, resTemp, Op164, secOp)',
+        "sub": 'findCarry(intWidth, resTemp, Op164, ~secOp)',
+        "logic": '0'
+    }
+    # Dict of ways to set the overflow flag.
+    overflowCode64 = {
+        "none": "none",
+        "add": 'findOverflow(intWidth, resTemp, Op164, secOp)',
+        "sub": 'findOverflow(intWidth, resTemp, Op164, ~secOp)',
+        "logic": '0'
+    }
+
+    immOp2 = "uint64_t secOp M5_VAR_USED = imm;"
+    sRegOp2 = "uint64_t secOp M5_VAR_USED = " + \
+              "shiftReg64(Op264, shiftAmt, shiftType, intWidth);"
+    eRegOp2 = "uint64_t secOp M5_VAR_USED = " + \
+              "extendReg64(Op264, extendType, shiftAmt, intWidth);"
+
+    def buildDataWork(mnem, code, flagType, suffix, buildCc, buildNonCc,
+                      base, templateBase):
+        code = '''
+        uint64_t resTemp M5_VAR_USED = 0;
+        ''' + code
+        ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType])
+        Name = mnem.capitalize() + suffix
+        iop = InstObjParams(mnem, Name, base, code)
+        iopCc = InstObjParams(mnem + "s", Name + "Cc", base, code + ccCode)
+
+        def subst(iop):
+            global header_output, decoder_output, exec_output
+            header_output += eval(templateBase + "Declare").subst(iop)
+            decoder_output += eval(templateBase + "Constructor").subst(iop)
+            exec_output += BasicExecute.subst(iop)
+
+        if buildNonCc:
+            subst(iop)
+        if buildCc:
+            subst(iopCc)
+
+    def buildXImmDataInst(mnem, code, flagType = "logic", \
+                          buildCc = True, buildNonCc = True, \
+                          suffix = "XImm"):
+        buildDataWork(mnem, immOp2 + code, flagType, suffix,
+                      buildCc, buildNonCc, "DataXImmOp", "DataXImm")
+
+    def buildXSRegDataInst(mnem, code, flagType = "logic", \
+                           buildCc = True, buildNonCc = True, \
+                           suffix = "XSReg"):
+        buildDataWork(mnem, sRegOp2 + code, flagType, suffix,
+                      buildCc, buildNonCc, "DataXSRegOp", "DataXSReg")
+
+    def buildXERegDataInst(mnem, code, flagType = "logic", \
+                           buildCc = True, buildNonCc = True, \
+                           suffix = "XEReg"):
+        buildDataWork(mnem, eRegOp2 + code, flagType, suffix,
+                      buildCc, buildNonCc, "DataXERegOp", "DataXEReg")
+
+    def buildDataInst(mnem, code, flagType = "logic",
+                      buildCc = True, buildNonCc = True):
+        buildXImmDataInst(mnem, code, flagType, buildCc, buildNonCc)
+        buildXSRegDataInst(mnem, code, flagType, buildCc, buildNonCc)
+        buildXERegDataInst(mnem, code, flagType, buildCc, buildNonCc)
+
+    buildXImmDataInst("adr", "Dest64 = RawPC + imm", buildCc = False);
+    buildXImmDataInst("adrp", "Dest64 = (RawPC & ~mask(12)) + imm",
+                      buildCc = False);
+    buildDataInst("and", "Dest64 = resTemp = Op164 & secOp;")
+    buildDataInst("eor", "Dest64 = Op164 ^ secOp;", buildCc = False)
+    buildXSRegDataInst("eon", "Dest64 = Op164 ^ ~secOp;", buildCc = False)
+    buildDataInst("sub", "Dest64 = resTemp = Op164 - secOp;", "sub")
+    buildDataInst("add", "Dest64 = resTemp = Op164 + secOp;", "add")
+    buildXSRegDataInst("adc",
+            "Dest64 = resTemp = Op164 + secOp + %s;" % oldC, "add")
+    buildXSRegDataInst("sbc",
+            "Dest64 = resTemp = Op164 - secOp - !%s;" % oldC, "sub")
+    buildDataInst("orr", "Dest64 = Op164 | secOp;", buildCc = False)
+    buildXSRegDataInst("orn", "Dest64 = Op164 | ~secOp;", buildCc = False)
+    buildXSRegDataInst("bic", "Dest64 = resTemp = Op164 & ~secOp;")
+
+    def buildDataXImmInst(mnem, code, optArgs = []):
+        global header_output, decoder_output, exec_output
+        classNamePrefix = mnem[0].upper() + mnem[1:]
+        templateBase = "DataXImm"
+        iop = InstObjParams(mnem, classNamePrefix + "64",
+                            templateBase + "Op", code, optArgs)
+        header_output += eval(templateBase + "Declare").subst(iop)
+        decoder_output += eval(templateBase + "Constructor").subst(iop)
+        exec_output += BasicExecute.subst(iop)
+
+    def buildDataXRegInst(mnem, regOps, code, optArgs = [],
+                          overrideOpClass=None):
+        global header_output, decoder_output, exec_output
+        templateBase = "DataX%dReg" % regOps
+        classNamePrefix = mnem[0].upper() + mnem[1:]
+        if overrideOpClass:
+            iop = InstObjParams(mnem, classNamePrefix + "64",
+                                templateBase + "Op",
+                                { 'code': code, 'op_class': overrideOpClass},
+                                optArgs)
+        else:
+            iop = InstObjParams(mnem, classNamePrefix + "64",
+                                templateBase + "Op", code, optArgs)
+        header_output += eval(templateBase + "Declare").subst(iop)
+        decoder_output += eval(templateBase + "Constructor").subst(iop)
+        exec_output += BasicExecute.subst(iop)
+
+    buildDataXRegInst("madd", 3, "Dest64 = Op164 + Op264 * Op364",
+        overrideOpClass="IntMultOp")
+    buildDataXRegInst("msub", 3, "Dest64 = Op164 - Op264 * Op364",
+        overrideOpClass="IntMultOp")
+    buildDataXRegInst("smaddl", 3,
+        "XDest = XOp1 + sext<32>(WOp2) * sext<32>(WOp3)",
+        overrideOpClass="IntMultOp")
+    buildDataXRegInst("smsubl", 3,
+        "XDest = XOp1 - sext<32>(WOp2) * sext<32>(WOp3)",
+        overrideOpClass="IntMultOp")
+    buildDataXRegInst("smulh", 2, '''
+        uint64_t op1H = (int32_t)(XOp1 >> 32);
+        uint64_t op1L = (uint32_t)XOp1;
+        uint64_t op2H = (int32_t)(XOp2 >> 32);
+        uint64_t op2L = (uint32_t)XOp2;
+        uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L;
+        uint64_t mid2 = op1L * op2H;
+        uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32;
+        result += shiftReg64(mid1, 32, ASR, intWidth);
+        result += shiftReg64(mid2, 32, ASR, intWidth);
+        XDest = result + op1H * op2H;
+    ''', overrideOpClass="IntMultOp")
+    buildDataXRegInst("umaddl", 3, "XDest = XOp1 + WOp2 * WOp3",
+        overrideOpClass="IntMultOp")
+    buildDataXRegInst("umsubl", 3, "XDest = XOp1 - WOp2 * WOp3",
+        overrideOpClass="IntMultOp")
+    buildDataXRegInst("umulh", 2, '''
+        uint64_t op1H = (uint32_t)(XOp1 >> 32);
+        uint64_t op1L = (uint32_t)XOp1;
+        uint64_t op2H = (uint32_t)(XOp2 >> 32);
+        uint64_t op2L = (uint32_t)XOp2;
+        uint64_t mid1 = ((op1L * op2L) >> 32) + op1H * op2L;
+        uint64_t mid2 = op1L * op2H;
+        uint64_t result = ((uint64_t)(uint32_t)mid1 + (uint32_t)mid2) >> 32;
+        result += mid1 >> 32;
+        result += mid2 >> 32;
+        XDest = result + op1H * op2H;
+    ''', overrideOpClass="IntMultOp")
+
+    buildDataXRegInst("asrv", 2,
+        "Dest64 = shiftReg64(Op164, Op264, ASR, intWidth)")
+    buildDataXRegInst("lslv", 2,
+        "Dest64 = shiftReg64(Op164, Op264, LSL, intWidth)")
+    buildDataXRegInst("lsrv", 2,
+        "Dest64 = shiftReg64(Op164, Op264, LSR, intWidth)")
+    buildDataXRegInst("rorv", 2,
+        "Dest64 = shiftReg64(Op164, Op264, ROR, intWidth)")
+    buildDataXRegInst("sdiv", 2, '''
+        int64_t op1 = Op164;
+        int64_t op2 = Op264;
+        if (intWidth == 32) {
+            op1 = sext<32>(op1);
+            op2 = sext<32>(op2);
+        }
+        Dest64 = op2 == -1 ? -op1 : op2 ? op1 / op2 : 0;
+    ''', overrideOpClass="IntDivOp")
+    buildDataXRegInst("udiv", 2, "Dest64 = Op264 ? Op164 / Op264 : 0",
+        overrideOpClass="IntDivOp")
+
+    buildDataXRegInst("cls", 1, '''
+        uint64_t op1 = Op164;
+        if (bits(op1, intWidth - 1))
+            op1 ^= mask(intWidth);
+        Dest64 = (op1 == 0) ? intWidth - 1 : (intWidth - 2 - findMsbSet(op1));
+    ''')
+    buildDataXRegInst("clz", 1, '''
+        Dest64 = (Op164 == 0) ? intWidth : (intWidth - 1 - findMsbSet(Op164));
+    ''')
+    buildDataXRegInst("rbit", 1, '''
+        uint64_t result = Op164;
+        uint64_t lBit = 1ULL << (intWidth - 1);
+        uint64_t rBit = 1ULL;
+        while (lBit > rBit) {
+            uint64_t maskBits = lBit | rBit;
+            uint64_t testBits = result & maskBits;
+            // If these bits are different, swap them by toggling them.
+            if (testBits && testBits != maskBits)
+                result ^= maskBits;
+            lBit >>= 1; rBit <<= 1;
+        }
+        Dest64 = result;
+    ''')
+    buildDataXRegInst("rev", 1, '''
+        if (intWidth == 32)
+            Dest64 = betole<uint32_t>(Op164);
+        else
+            Dest64 = betole<uint64_t>(Op164);
+    ''')
+    buildDataXRegInst("rev16", 1, '''
+        int count = intWidth / 16;
+        uint64_t result = 0;
+        for (unsigned i = 0; i < count; i++) {
+            uint16_t hw = Op164 >> (i * 16);
+            result |= (uint64_t)betole<uint16_t>(hw) << (i * 16);
+        }
+        Dest64 = result;
+    ''')
+    buildDataXRegInst("rev32", 1, '''
+        int count = intWidth / 32;
+        uint64_t result = 0;
+        for (unsigned i = 0; i < count; i++) {
+            uint32_t hw = Op164 >> (i * 32);
+            result |= (uint64_t)betole<uint32_t>(hw) << (i * 32);
+        }
+        Dest64 = result;
+    ''')
+
+    msrMrs64EnabledCheckCode = '''
+        // Check for read/write access right
+        if (!can%sAArch64SysReg(flat_idx, Scr64, cpsr, xc->tcBase())) {
+            if (flat_idx == MISCREG_DAIF ||
+                flat_idx == MISCREG_DC_ZVA_Xt ||
+                flat_idx == MISCREG_DC_CVAC_Xt ||
+                flat_idx == MISCREG_DC_CIVAC_Xt
+                )
+                return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64);
+            return new UndefinedInstruction(machInst, false, mnemonic);
+        }
+
+        // Check for traps to supervisor (FP/SIMD regs)
+        if (el <= EL1 && msrMrs64TrapToSup(flat_idx, el, Cpacr64))
+            return new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_SIMD_FP);
+
+        bool is_vfp_neon = false;
+
+        // Check for traps to hypervisor
+        if ((ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) &&
+            msrMrs64TrapToHyp(flat_idx, %s, CptrEl264, Hcr64, &is_vfp_neon)) {
+            return new HypervisorTrap(machInst, is_vfp_neon ? 0x1E00000 : imm,
+                is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64);
+        }
+
+        // Check for traps to secure monitor
+        if ((ArmSystem::haveSecurity(xc->tcBase()) && el <= EL3) &&
+            msrMrs64TrapToMon(flat_idx, CptrEl364, el, &is_vfp_neon)) {
+            return new SecureMonitorTrap(machInst,
+                is_vfp_neon ? 0x1E00000 : imm,
+                is_vfp_neon ? EC_TRAPPED_SIMD_FP : EC_TRAPPED_MSR_MRS_64);
+        }
+    '''
+
+    buildDataXImmInst("mrs", '''
+        MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->
+            flattenMiscIndex(op1);
+        CPSR cpsr = Cpsr;
+        ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+        %s
+        XDest = MiscOp1_ud;
+    ''' % (msrMrs64EnabledCheckCode % ('Read', 'true'),),
+        ["IsSerializeBefore"])
+
+    buildDataXRegInst("mrsNZCV", 1, '''
+        CPSR cpsr = 0;
+        cpsr.nz = CondCodesNZ;
+        cpsr.c = CondCodesC;
+        cpsr.v = CondCodesV;
+        XDest = cpsr;
+    ''')
+
+    buildDataXImmInst("msr", '''
+        MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->
+            flattenMiscIndex(dest);
+        CPSR cpsr = Cpsr;
+        ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+        %s
+        MiscDest_ud = XOp1;
+    ''' % (msrMrs64EnabledCheckCode % ('Write', 'false'),),
+        ["IsSerializeAfter", "IsNonSpeculative"])
+
+    buildDataXRegInst("msrNZCV", 1, '''
+        CPSR cpsr = XOp1;
+        CondCodesNZ = cpsr.nz;
+        CondCodesC = cpsr.c;
+        CondCodesV = cpsr.v;
+    ''')
+
+    msrdczva_ea_code = '''
+        MiscRegIndex flat_idx = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest);
+        CPSR cpsr = Cpsr;
+        ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+    '''
+
+    msrdczva_ea_code += msrMrs64EnabledCheckCode % ('Write', 'false')
+    msrdczva_ea_code += '''
+           Request::Flags memAccessFlags = Request::CACHE_BLOCK_ZERO|ArmISA::TLB::MustBeOne;
+           EA = XBase;
+           assert(!(Dczid & 0x10));
+           uint64_t op_size = power(2, Dczid + 2);
+           EA &= ~(op_size - 1);
+
+   '''
+
+    msrDCZVAIop = InstObjParams("dczva", "Dczva", "SysDC64",
+                { "ea_code" : msrdczva_ea_code,
+                  "memacc_code" : ";", "use_uops" : 0,
+                  "op_wb" : ";", "fa_code" : ";"}, ['IsStore', 'IsMemRef']);
+    header_output += DCStore64Declare.subst(msrDCZVAIop);
+    decoder_output += DCStore64Constructor.subst(msrDCZVAIop);
+    exec_output += DCStore64Execute.subst(msrDCZVAIop);
+    exec_output += DCStore64InitiateAcc.subst(msrDCZVAIop);
+    exec_output += Store64CompleteAcc.subst(msrDCZVAIop);
+
+
+
+    buildDataXImmInst("msrSP", '''
+        if (!canWriteAArch64SysReg(
+                (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest),
+                Scr64, Cpsr, xc->tcBase())) {
+            return new UndefinedInstruction(machInst, false, mnemonic);
+        }
+        MiscDest_ud = imm;
+    ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"])
+
+    buildDataXImmInst("msrDAIFSet", '''
+        if (!canWriteAArch64SysReg(
+                (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest),
+                Scr64, Cpsr, xc->tcBase())) {
+            return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64);
+        }
+        CPSR cpsr = Cpsr;
+        cpsr.daif = cpsr.daif | imm;
+        Cpsr = cpsr;
+    ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"])
+
+    buildDataXImmInst("msrDAIFClr", '''
+        if (!canWriteAArch64SysReg(
+                (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest),
+                Scr64, Cpsr, xc->tcBase())) {
+            return new UndefinedInstruction(machInst, 0, EC_TRAPPED_MSR_MRS_64);
+        }
+        CPSR cpsr = Cpsr;
+        cpsr.daif = cpsr.daif & ~imm;
+        Cpsr = cpsr;
+    ''', optArgs = ["IsSerializeAfter", "IsNonSpeculative"])
+
+    def buildDataXCompInst(mnem, instType, suffix, code):
+        global header_output, decoder_output, exec_output
+        templateBase = "DataXCond%s" % instType
+        iop = InstObjParams(mnem, mnem.capitalize() + suffix + "64",
+                            templateBase + "Op", code)
+        header_output += eval(templateBase + "Declare").subst(iop)
+        decoder_output += eval(templateBase + "Constructor").subst(iop)
+        exec_output += BasicExecute.subst(iop)
+
+    def buildDataXCondImmInst(mnem, code):
+        buildDataXCompInst(mnem, "CompImm", "Imm", code)
+    def buildDataXCondRegInst(mnem, code):
+        buildDataXCompInst(mnem, "CompReg", "Reg", code)
+    def buildDataXCondSelInst(mnem, code):
+        buildDataXCompInst(mnem, "Sel", "", code)
+
+    def condCompCode(flagType, op, imm):
+        ccCode = createCcCode64(carryCode64[flagType], overflowCode64[flagType])
+        opDecl = "uint64_t secOp M5_VAR_USED = imm;"
+        if not imm:
+            opDecl = "uint64_t secOp M5_VAR_USED = Op264;"
+        return opDecl + '''
+            if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+                uint64_t resTemp = Op164 ''' + op + ''' secOp;
+        ''' + ccCode + '''
+            } else {
+                CondCodesNZ = (defCc >> 2) & 0x3;
+                CondCodesC = (defCc >> 1) & 0x1;
+                CondCodesV = defCc & 0x1;
+            }
+        '''
+
+    buildDataXCondImmInst("ccmn", condCompCode("add", "+", True))
+    buildDataXCondImmInst("ccmp", condCompCode("sub", "-", True))
+    buildDataXCondRegInst("ccmn", condCompCode("add", "+", False))
+    buildDataXCondRegInst("ccmp", condCompCode("sub", "-", False))
+
+    condSelCode = '''
+        if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+            Dest64 = Op164;
+        } else {
+            Dest64 = %(altVal)s;
+        }
+    '''
+    buildDataXCondSelInst("csel", condSelCode % {"altVal" : "Op264"})
+    buildDataXCondSelInst("csinc", condSelCode % {"altVal" : "Op264 + 1"})
+    buildDataXCondSelInst("csinv", condSelCode % {"altVal" : "~Op264"})
+    buildDataXCondSelInst("csneg", condSelCode % {"altVal" : "-Op264"})
+}};
diff --git a/src/arch/arm/isa/insts/div.isa b/src/arch/arm/isa/insts/div.isa
index 1ff6ef9e4..0896ea94f 100644
--- a/src/arch/arm/isa/insts/div.isa
+++ b/src/arch/arm/isa/insts/div.isa
@@ -40,12 +40,6 @@
 let {{
     sdivCode = '''
     if (Op2_sw == 0) {
-        if (((SCTLR)Sctlr).dz) {
-            if (FullSystem)
-                return new UndefinedInstruction;
-            else
-                return new UndefinedInstruction(false, mnemonic);
-        }
         Dest_sw = 0;
     } else if (Op1_sw == INT_MIN && Op2_sw == -1) {
         Dest_sw = INT_MIN;
@@ -63,12 +57,6 @@ let {{
 
     udivCode = '''
     if (Op2_uw == 0) {
-        if (((SCTLR)Sctlr).dz) {
-            if (FullSystem)
-                return new UndefinedInstruction;
-            else
-                return new UndefinedInstruction(false, mnemonic);
-        }
         Dest_uw = 0;
     } else {
         Dest_uw = Op1_uw / Op2_uw;
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa
index b701995f4..60f030c3d 100644
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -191,14 +191,17 @@ let {{
     decoder_output = ""
     exec_output = ""
 
-    vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegOp",
-                            { "code": vmsrEnabledCheckCode + \
-                                      "MiscDest = Op1;",
+    vmsrCode = vmsrEnabledCheckCode + '''
+    MiscDest = Op1;
+    '''
+
+    vmsrIop = InstObjParams("vmsr", "Vmsr", "FpRegRegImmOp",
+                            { "code": vmsrCode,
                               "predicate_test": predicateTest,
                               "op_class": "SimdFloatMiscOp" },
                              ["IsSerializeAfter","IsNonSpeculative"])
-    header_output += FpRegRegOpDeclare.subst(vmsrIop);
-    decoder_output += FpRegRegOpConstructor.subst(vmsrIop);
+    header_output += FpRegRegImmOpDeclare.subst(vmsrIop);
+    decoder_output += FpRegRegImmOpConstructor.subst(vmsrIop);
     exec_output += PredOpExecute.subst(vmsrIop);
 
     vmsrFpscrCode = vmsrEnabledCheckCode + '''
@@ -215,14 +218,36 @@ let {{
     decoder_output += FpRegRegOpConstructor.subst(vmsrFpscrIop);
     exec_output += PredOpExecute.subst(vmsrFpscrIop);
 
-    vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegOp",
-                            { "code": vmrsEnabledCheckCode + \
-                                    "Dest = MiscOp1;",
+    vmrsCode = vmrsEnabledCheckCode + '''
+    CPSR cpsr = Cpsr;
+    SCR  scr  = Scr;
+    if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) {
+        HCR hcr = Hcr;
+        bool hypTrap = false;
+        switch(xc->tcBase()->flattenMiscIndex(op1)) {
+          case MISCREG_FPSID:
+            hypTrap = hcr.tid0;
+            break;
+          case MISCREG_MVFR0:
+          case MISCREG_MVFR1:
+            hypTrap = hcr.tid3;
+            break;
+        }
+        if (hypTrap) {
+            return new HypervisorTrap(machInst, imm,
+                EC_TRAPPED_CP10_MRC_VMRS);
+        }
+    }
+    Dest = MiscOp1;
+    '''
+
+    vmrsIop = InstObjParams("vmrs", "Vmrs", "FpRegRegImmOp",
+                            { "code": vmrsCode,
                               "predicate_test": predicateTest,
                               "op_class": "SimdFloatMiscOp" },
                             ["IsSerializeBefore"])
-    header_output += FpRegRegOpDeclare.subst(vmrsIop);
-    decoder_output += FpRegRegOpConstructor.subst(vmrsIop);
+    header_output += FpRegRegImmOpDeclare.subst(vmrsIop);
+    decoder_output += FpRegRegImmOpConstructor.subst(vmrsIop);
     exec_output += PredOpExecute.subst(vmrsIop);
 
     vmrsFpscrIop = InstObjParams("vmrs", "VmrsFpscr", "FpRegRegOp",
@@ -323,7 +348,7 @@ let {{
     decoder_output  += FpRegRegOpConstructor.subst(vmovRegQIop);
     exec_output += PredOpExecute.subst(vmovRegQIop);
 
-    vmovCoreRegBCode = vfpEnabledCheckCode + '''
+    vmovCoreRegBCode = simdEnabledCheckCode + '''
         FpDest_uw = insertBits(FpDest_uw, imm * 8 + 7, imm * 8, Op1_ub);
     '''
     vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp",
@@ -334,7 +359,7 @@ let {{
     decoder_output  += FpRegRegImmOpConstructor.subst(vmovCoreRegBIop);
     exec_output += PredOpExecute.subst(vmovCoreRegBIop);
 
-    vmovCoreRegHCode = vfpEnabledCheckCode + '''
+    vmovCoreRegHCode = simdEnabledCheckCode + '''
         FpDest_uw = insertBits(FpDest_uw, imm * 16 + 15, imm * 16, Op1_uh);
     '''
     vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp",
@@ -453,6 +478,17 @@ let {{
     singleCode = singleSimpleCode + '''
         FpscrExc = fpscr;
     '''
+    singleTernOp = vfpEnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        VfpSavedState state = prepFpState(fpscr.rMode);
+        float cOp1 = FpOp1;
+        float cOp2 = FpOp2;
+        float cOp3 = FpDestP0;
+        FpDestP0   = ternaryOp(fpscr, %(palam)s, %(op)s,
+                               fpscr.fz, fpscr.dn, fpscr.rMode);
+        finishVfp(fpscr, state, fpscr.fz);
+        FpscrExc = fpscr;
+    '''
     singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \
                 "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
     singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)"
@@ -463,6 +499,19 @@ let {{
         FpDestP1_uw = dblHi(dest);
         FpscrExc = fpscr;
     '''
+    doubleTernOp = vfpEnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        VfpSavedState state = prepFpState(fpscr.rMode);
+        double cOp1  = dbl(FpOp1P0_uw, FpOp1P1_uw);
+        double cOp2  = dbl(FpOp2P0_uw, FpOp2P1_uw);
+        double cOp3  = dbl(FpDestP0_uw, FpDestP1_uw);
+        double cDest = ternaryOp(fpscr, %(palam)s, %(op)s,
+                                 fpscr.fz, fpscr.dn, fpscr.rMode);
+        FpDestP0_uw  = dblLow(cDest);
+        FpDestP1_uw  = dblHi(cDest);
+        finishVfp(fpscr, state, fpscr.fz);
+        FpscrExc = fpscr;
+    '''
     doubleBinOp = '''
         binaryOp(fpscr, dbl(FpOp1P0_uw, FpOp1P1_uw),
                         dbl(FpOp2P0_uw, FpOp2P1_uw),
@@ -473,6 +522,37 @@ let {{
                 fpscr.fz, fpscr.rMode)
     '''
 
+    def buildTernaryFpOp(Name, base, opClass, singleOp, doubleOp, paramStr):
+        global header_output, decoder_output, exec_output
+
+        code = singleTernOp % { "op": singleOp, "palam": paramStr }
+        sIop = InstObjParams(Name.lower() + "s", Name + "S", base,
+                { "code": code,
+                  "predicate_test": predicateTest,
+                  "op_class": opClass }, [])
+        code = doubleTernOp % { "op": doubleOp, "palam": paramStr }
+        dIop = InstObjParams(Name.lower() + "d", Name + "D", base,
+                { "code": code,
+                  "predicate_test": predicateTest,
+                  "op_class": opClass }, [])
+
+        declareTempl     = eval(base + "Declare");
+        constructorTempl = eval(base + "Constructor");
+
+        for iop in sIop, dIop:
+            header_output  += declareTempl.subst(iop)
+            decoder_output += constructorTempl.subst(iop)
+            exec_output    += PredOpExecute.subst(iop)
+
+    buildTernaryFpOp("Vfma",  "FpRegRegRegOp", "SimdFloatMultAccOp",
+                     "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2,  cOp3" )
+    buildTernaryFpOp("Vfms",  "FpRegRegRegOp", "SimdFloatMultAccOp",
+                     "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2,  cOp3" )
+    buildTernaryFpOp("Vfnma", "FpRegRegRegOp", "SimdFloatMultAccOp",
+                     "fpMulAdd<float>", "fpMulAdd<double>", "-cOp1, cOp2, -cOp3" )
+    buildTernaryFpOp("Vfnms", "FpRegRegRegOp", "SimdFloatMultAccOp",
+                     "fpMulAdd<float>", "fpMulAdd<double>", " cOp1, cOp2, -cOp3" )
+
     def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp):
         global header_output, decoder_output, exec_output
 
@@ -830,7 +910,7 @@ let {{
         VfpSavedState state = prepFpState(fpscr.rMode);
         vfpFlushToZero(fpscr, FpOp1);
         __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0, false);
+        FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0, false);
         __asm__ __volatile__("" :: "m" (FpDest_uw));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -849,7 +929,7 @@ let {{
         vfpFlushToZero(fpscr, cOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false);
+        uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0, false);
         __asm__ __volatile__("" :: "m" (result));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = result;
@@ -868,7 +948,7 @@ let {{
         VfpSavedState state = prepFpState(fpscr.rMode);
         vfpFlushToZero(fpscr, FpOp1);
         __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0, false);
+        FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0, false);
         __asm__ __volatile__("" :: "m" (FpDest_sw));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -887,7 +967,7 @@ let {{
         vfpFlushToZero(fpscr, cOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false);
+        int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0, false);
         __asm__ __volatile__("" :: "m" (result));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = result;
@@ -907,7 +987,7 @@ let {{
         VfpSavedState state = prepFpState(fpscr.rMode);
         fesetround(FeRoundZero);
         __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_uw = vfpFpSToFixed(FpOp1, false, false, 0);
+        FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0);
         __asm__ __volatile__("" :: "m" (FpDest_uw));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -927,7 +1007,7 @@ let {{
         VfpSavedState state = prepFpState(fpscr.rMode);
         fesetround(FeRoundZero);
         __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        uint64_t result = vfpFpDToFixed(cOp1, false, false, 0);
+        uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0);
         __asm__ __volatile__("" :: "m" (result));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = result;
@@ -947,7 +1027,7 @@ let {{
         VfpSavedState state = prepFpState(fpscr.rMode);
         fesetround(FeRoundZero);
         __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_sw = vfpFpSToFixed(FpOp1, true, false, 0);
+        FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0);
         __asm__ __volatile__("" :: "m" (FpDest_sw));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -967,7 +1047,7 @@ let {{
         VfpSavedState state = prepFpState(fpscr.rMode);
         fesetround(FeRoundZero);
         __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        int64_t result = vfpFpDToFixed(cOp1, true, false, 0);
+        int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0);
         __asm__ __volatile__("" :: "m" (result));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = result;
@@ -1333,7 +1413,7 @@ let {{
         vfpFlushToZero(fpscr, FpOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_sw = vfpFpSToFixed(FpOp1, true, false, imm);
+        FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, imm);
         __asm__ __volatile__("" :: "m" (FpDest_sw));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -1352,7 +1432,7 @@ let {{
         vfpFlushToZero(fpscr, cOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm);
+        uint64_t mid = vfpFpToFixed<double>(cOp1, true, 32, imm);
         __asm__ __volatile__("" :: "m" (mid));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = mid;
@@ -1372,7 +1452,7 @@ let {{
         vfpFlushToZero(fpscr, FpOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_uw = vfpFpSToFixed(FpOp1, false, false, imm);
+        FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, imm);
         __asm__ __volatile__("" :: "m" (FpDest_uw));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -1391,7 +1471,7 @@ let {{
         vfpFlushToZero(fpscr, cOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm);
+        uint64_t mid = vfpFpToFixed<double>(cOp1, false, 32, imm);
         __asm__ __volatile__("" :: "m" (mid));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = mid;
@@ -1410,7 +1490,7 @@ let {{
         FPSCR fpscr = (FPSCR) FpscrExc;
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (FpOp1_sw) : "m" (FpOp1_sw));
-        FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, false, imm);
+        FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sw, 32, imm);
         __asm__ __volatile__("" :: "m" (FpDest));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -1428,7 +1508,7 @@ let {{
         uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32));
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (mid) : "m" (mid));
-        double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
+        double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm);
         __asm__ __volatile__("" :: "m" (cDest));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = dblLow(cDest);
@@ -1447,7 +1527,7 @@ let {{
         FPSCR fpscr = (FPSCR) FpscrExc;
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (FpOp1_uw) : "m" (FpOp1_uw));
-        FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, false, imm);
+        FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uw, 32, imm);
         __asm__ __volatile__("" :: "m" (FpDest));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -1465,7 +1545,7 @@ let {{
         uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32));
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (mid) : "m" (mid));
-        double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
+        double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 32, imm);
         __asm__ __volatile__("" :: "m" (cDest));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = dblLow(cDest);
@@ -1485,7 +1565,7 @@ let {{
         vfpFlushToZero(fpscr, FpOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_sh = vfpFpSToFixed(FpOp1, true, true, imm);
+        FpDest_sh = vfpFpToFixed<float>(FpOp1, true, 16, imm);
         __asm__ __volatile__("" :: "m" (FpDest_sh));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -1505,7 +1585,7 @@ let {{
         vfpFlushToZero(fpscr, cOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        uint64_t result = vfpFpDToFixed(cOp1, true, true, imm);
+        uint64_t result = vfpFpToFixed<double>(cOp1, true, 16, imm);
         __asm__ __volatile__("" :: "m" (result));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = result;
@@ -1526,7 +1606,7 @@ let {{
         vfpFlushToZero(fpscr, FpOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_uh = vfpFpSToFixed(FpOp1, false, true, imm);
+        FpDest_uh = vfpFpToFixed<float>(FpOp1, false, 16, imm);
         __asm__ __volatile__("" :: "m" (FpDest_uh));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -1546,7 +1626,7 @@ let {{
         vfpFlushToZero(fpscr, cOp1);
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm);
+        uint64_t mid = vfpFpToFixed<double>(cOp1, false, 16, imm);
         __asm__ __volatile__("" :: "m" (mid));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = mid;
@@ -1566,7 +1646,7 @@ let {{
         FPSCR fpscr = (FPSCR) FpscrExc;
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (FpOp1_sh) : "m" (FpOp1_sh));
-        FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, true, imm);
+        FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_sh, 16, imm);
         __asm__ __volatile__("" :: "m" (FpDest));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -1585,7 +1665,7 @@ let {{
         uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32));
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (mid) : "m" (mid));
-        double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
+        double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm);
         __asm__ __volatile__("" :: "m" (cDest));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = dblLow(cDest);
@@ -1605,7 +1685,7 @@ let {{
         FPSCR fpscr = (FPSCR) FpscrExc;
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (FpOp1_uh) : "m" (FpOp1_uh));
-        FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, true, imm);
+        FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1_uh, 16, imm);
         __asm__ __volatile__("" :: "m" (FpDest));
         finishVfp(fpscr, state, fpscr.fz);
         FpscrExc = fpscr;
@@ -1624,7 +1704,7 @@ let {{
         uint64_t mid = ((uint64_t)FpOp1P0_uw | ((uint64_t)FpOp1P1_uw << 32));
         VfpSavedState state = prepFpState(fpscr.rMode);
         __asm__ __volatile__("" : "=m" (mid) : "m" (mid));
-        double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
+        double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, 16, imm);
         __asm__ __volatile__("" :: "m" (cDest));
         finishVfp(fpscr, state, fpscr.fz);
         FpDestP0_uw = dblLow(cDest);
diff --git a/src/arch/arm/isa/insts/fp64.isa b/src/arch/arm/isa/insts/fp64.isa
new file mode 100644
index 000000000..95dec5062
--- /dev/null
+++ b/src/arch/arm/isa/insts/fp64.isa
@@ -0,0 +1,811 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Thomas Grocutt
+//          Edmund Grimley Evans
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    fmovImmSCode = vfp64EnabledCheckCode + '''
+        AA64FpDestP0_uw = bits(imm, 31, 0);
+        AA64FpDestP1_uw = 0;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+    '''
+    fmovImmSIop = InstObjParams("fmov", "FmovImmS", "FpRegImmOp",
+                                { "code": fmovImmSCode,
+                                  "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegImmOpDeclare.subst(fmovImmSIop);
+    decoder_output += FpRegImmOpConstructor.subst(fmovImmSIop);
+    exec_output    += BasicExecute.subst(fmovImmSIop);
+
+    fmovImmDCode = vfp64EnabledCheckCode + '''
+        AA64FpDestP0_uw = bits(imm, 31, 0);
+        AA64FpDestP1_uw = bits(imm, 63, 32);
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+    '''
+    fmovImmDIop = InstObjParams("fmov", "FmovImmD", "FpRegImmOp",
+                                { "code": fmovImmDCode,
+                                  "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegImmOpDeclare.subst(fmovImmDIop);
+    decoder_output += AA64FpRegImmOpConstructor.subst(fmovImmDIop);
+    exec_output    += BasicExecute.subst(fmovImmDIop);
+
+    fmovRegSCode = vfp64EnabledCheckCode + '''
+        AA64FpDestP0_uw = AA64FpOp1P0_uw;
+        AA64FpDestP1_uw = 0;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+    '''
+    fmovRegSIop = InstObjParams("fmov", "FmovRegS", "FpRegRegOp",
+                                { "code": fmovRegSCode,
+                                  "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fmovRegSIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegSIop);
+    exec_output    += BasicExecute.subst(fmovRegSIop);
+
+    fmovRegDCode = vfp64EnabledCheckCode + '''
+        AA64FpDestP0_uw = AA64FpOp1P0_uw;
+        AA64FpDestP1_uw = AA64FpOp1P1_uw;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+    '''
+    fmovRegDIop = InstObjParams("fmov", "FmovRegD", "FpRegRegOp",
+                                { "code": fmovRegDCode,
+                                  "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fmovRegDIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegDIop);
+    exec_output    += BasicExecute.subst(fmovRegDIop);
+
+    fmovCoreRegWCode = vfp64EnabledCheckCode + '''
+        AA64FpDestP0_uw = WOp1_uw;
+        AA64FpDestP1_uw = 0;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+    '''
+    fmovCoreRegWIop = InstObjParams("fmov", "FmovCoreRegW", "FpRegRegOp",
+                                    { "code": fmovCoreRegWCode,
+                                      "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fmovCoreRegWIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegWIop);
+    exec_output    += BasicExecute.subst(fmovCoreRegWIop);
+
+    fmovCoreRegXCode = vfp64EnabledCheckCode + '''
+        AA64FpDestP0_uw = XOp1_ud;
+        AA64FpDestP1_uw = XOp1_ud >> 32;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+    '''
+    fmovCoreRegXIop = InstObjParams("fmov", "FmovCoreRegX", "FpRegRegOp",
+                                    { "code": fmovCoreRegXCode,
+                                      "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fmovCoreRegXIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fmovCoreRegXIop);
+    exec_output    += BasicExecute.subst(fmovCoreRegXIop);
+
+    fmovUCoreRegXCode = vfp64EnabledCheckCode + '''
+        AA64FpDestP2_uw = XOp1_ud;
+        AA64FpDestP3_uw = XOp1_ud >> 32;
+    '''
+    fmovUCoreRegXIop = InstObjParams("fmov", "FmovUCoreRegX", "FpRegRegOp",
+                                    { "code": fmovUCoreRegXCode,
+                                      "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fmovUCoreRegXIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fmovUCoreRegXIop);
+    exec_output    += BasicExecute.subst(fmovUCoreRegXIop);
+
+    fmovRegCoreWCode = vfp64EnabledCheckCode + '''
+        WDest = AA64FpOp1P0_uw;
+    '''
+    fmovRegCoreWIop = InstObjParams("fmov", "FmovRegCoreW", "FpRegRegOp",
+                                     { "code": fmovRegCoreWCode,
+                                       "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fmovRegCoreWIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreWIop);
+    exec_output    += BasicExecute.subst(fmovRegCoreWIop);
+
+    fmovRegCoreXCode = vfp64EnabledCheckCode + '''
+        XDest = ( ((uint64_t) AA64FpOp1P1_uw) << 32) | AA64FpOp1P0_uw;
+    '''
+    fmovRegCoreXIop = InstObjParams("fmov", "FmovRegCoreX", "FpRegRegOp",
+                                     { "code": fmovRegCoreXCode,
+                                       "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fmovRegCoreXIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fmovRegCoreXIop);
+    exec_output    += BasicExecute.subst(fmovRegCoreXIop);
+
+    fmovURegCoreXCode = vfp64EnabledCheckCode + '''
+        XDest = ( ((uint64_t) AA64FpOp1P3_uw) << 32) | AA64FpOp1P2_uw;
+    '''
+    fmovURegCoreXIop = InstObjParams("fmov", "FmovURegCoreX", "FpRegRegOp",
+                                    { "code":     fmovURegCoreXCode,
+                                      "op_class": "SimdFloatMiscOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fmovURegCoreXIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fmovURegCoreXIop);
+    exec_output    += BasicExecute.subst(fmovURegCoreXIop);
+}};
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    singleIntConvCode = vfp64EnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        uint32_t cOp1  = AA64FpOp1P0_uw;
+        uint32_t cDest = %(op)s;
+        AA64FpDestP0_uw = cDest;
+        AA64FpDestP1_uw = 0;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+        FpscrExc = fpscr;
+    '''
+
+    singleIntConvCode2 = vfp64EnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        uint32_t cOp1  = AA64FpOp1P0_uw;
+        uint32_t cOp2  = AA64FpOp2P0_uw;
+        uint32_t cDest = %(op)s;
+        AA64FpDestP0_uw = cDest;
+        AA64FpDestP1_uw = 0;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+        FpscrExc = fpscr;
+    '''
+
+    singleBinOp = "binaryOp(fpscr, AA64FpOp1P0, AA64FpOp2P0," + \
+                "%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
+    singleUnaryOp = "unaryOp(fpscr, AA64FpOp1P0, %(func)s, fpscr.fz, fpscr.rMode)"
+
+    doubleIntConvCode = vfp64EnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        uint64_t cOp1  = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
+        uint64_t cDest = %(op)s;
+        AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
+        AA64FpDestP1_uw = cDest >> 32;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+        FpscrExc = fpscr;
+    '''
+
+    doubleIntConvCode2 = vfp64EnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        uint64_t cOp1  = ((uint64_t) AA64FpOp1P1_uw) << 32 | AA64FpOp1P0_uw;
+        uint64_t cOp2  = ((uint64_t) AA64FpOp2P1_uw) << 32 | AA64FpOp2P0_uw;
+        uint64_t cDest = %(op)s;
+        AA64FpDestP0_uw = cDest & 0xFFFFFFFF;
+        AA64FpDestP1_uw = cDest >> 32;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+        FpscrExc = fpscr;
+    '''
+
+    doubleBinOp = '''
+        binaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw),
+                        dbl(AA64FpOp2P0_uw, AA64FpOp2P1_uw),
+                        %(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
+    '''
+    doubleUnaryOp = '''
+        unaryOp(fpscr, dbl(AA64FpOp1P0_uw, AA64FpOp1P1_uw), %(func)s,
+                fpscr.fz, fpscr.rMode)
+    '''
+
+    def buildTernaryFpOp(name, opClass, sOp, dOp):
+        global header_output, decoder_output, exec_output
+        for isDouble in True, False:
+            code = vfp64EnabledCheckCode + '''
+                FPSCR fpscr = (FPSCR) FpscrExc;
+            '''
+            if isDouble:
+                code += '''
+                    uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+                    uint64_t cOp2 = AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32;
+                    uint64_t cOp3 = AA64FpOp3P0_uw | (uint64_t)AA64FpOp3P1_uw << 32;
+                    uint64_t cDest;
+                ''' "cDest = " + dOp + ";" + '''
+                    AA64FpDestP0_uw = cDest;
+                    AA64FpDestP1_uw = cDest >> 32;
+                '''
+            else:
+                code += '''
+                    uint32_t cOp1 = AA64FpOp1P0_uw;
+                    uint32_t cOp2 = AA64FpOp2P0_uw;
+                    uint32_t cOp3 = AA64FpOp3P0_uw;
+                    uint32_t cDest;
+                ''' "cDest = " + sOp + ";" + '''
+                    AA64FpDestP0_uw = cDest;
+                    AA64FpDestP1_uw = 0;
+                '''
+            code += '''
+                AA64FpDestP2_uw = 0;
+                AA64FpDestP3_uw = 0;
+                FpscrExc = fpscr;
+            '''
+
+            iop = InstObjParams(name.lower(), name + ("D" if isDouble else "S"),
+                                "FpRegRegRegRegOp",
+                                { "code": code, "op_class": opClass }, [])
+
+            header_output  += AA64FpRegRegRegRegOpDeclare.subst(iop)
+            decoder_output += AA64FpRegRegRegRegOpConstructor.subst(iop)
+            exec_output    += BasicExecute.subst(iop)
+
+    buildTernaryFpOp("FMAdd", "SimdFloatMultAccOp",
+                     "fplibMulAdd<uint32_t>(cOp3, cOp1, cOp2, fpscr)",
+                     "fplibMulAdd<uint64_t>(cOp3, cOp1, cOp2, fpscr)" )
+    buildTernaryFpOp("FMSub", "SimdFloatMultAccOp",
+                     "fplibMulAdd<uint32_t>(cOp3, fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
+                     "fplibMulAdd<uint64_t>(cOp3, fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
+    buildTernaryFpOp("FNMAdd", "SimdFloatMultAccOp",
+                     "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), fplibNeg<uint32_t>(cOp1), cOp2, fpscr)",
+                     "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), fplibNeg<uint64_t>(cOp1), cOp2, fpscr)" )
+    buildTernaryFpOp("FNMSub", "SimdFloatMultAccOp",
+                     "fplibMulAdd<uint32_t>(fplibNeg<uint32_t>(cOp3), cOp1, cOp2, fpscr)",
+                     "fplibMulAdd<uint64_t>(fplibNeg<uint64_t>(cOp3), cOp1, cOp2, fpscr)" )
+
+    def buildBinFpOp(name, Name, base, opClass, singleOp, doubleOp):
+        global header_output, decoder_output, exec_output
+
+        code = singleIntConvCode2 % { "op": singleOp }
+        sIop = InstObjParams(name, Name + "S", base,
+                { "code": code,
+                  "op_class": opClass }, [])
+
+        code = doubleIntConvCode2 % { "op": doubleOp }
+        dIop = InstObjParams(name, Name + "D", base,
+                { "code": code,
+                  "op_class": opClass }, [])
+
+        declareTempl     = eval(         base + "Declare");
+        constructorTempl = eval("AA64" + base + "Constructor");
+
+        for iop in sIop, dIop:
+            header_output  += declareTempl.subst(iop)
+            decoder_output += constructorTempl.subst(iop)
+            exec_output    += BasicExecute.subst(iop)
+
+    buildBinFpOp("fadd", "FAdd", "FpRegRegRegOp", "SimdFloatAddOp",
+                 "fplibAdd<uint32_t>(cOp1, cOp2, fpscr)",
+                 "fplibAdd<uint64_t>(cOp1, cOp2, fpscr)")
+    buildBinFpOp("fsub", "FSub", "FpRegRegRegOp", "SimdFloatAddOp",
+                 "fplibSub<uint32_t>(cOp1, cOp2, fpscr)",
+                 "fplibSub<uint64_t>(cOp1, cOp2, fpscr)")
+    buildBinFpOp("fdiv", "FDiv", "FpRegRegRegOp", "SimdFloatDivOp",
+                 "fplibDiv<uint32_t>(cOp1, cOp2, fpscr)",
+                 "fplibDiv<uint64_t>(cOp1, cOp2, fpscr)")
+    buildBinFpOp("fmul", "FMul", "FpRegRegRegOp", "SimdFloatMultOp",
+                 "fplibMul<uint32_t>(cOp1, cOp2, fpscr)",
+                 "fplibMul<uint64_t>(cOp1, cOp2, fpscr)")
+    buildBinFpOp("fnmul", "FNMul", "FpRegRegRegOp", "SimdFloatMultOp",
+                 "fplibNeg<uint32_t>(fplibMul<uint32_t>(cOp1, cOp2, fpscr))",
+                 "fplibNeg<uint64_t>(fplibMul<uint64_t>(cOp1, cOp2, fpscr))")
+    buildBinFpOp("fmin", "FMin", "FpRegRegRegOp", "SimdFloatCmpOp",
+                 "fplibMin<uint32_t>(cOp1, cOp2, fpscr)",
+                 "fplibMin<uint64_t>(cOp1, cOp2, fpscr)")
+    buildBinFpOp("fmax", "FMax", "FpRegRegRegOp", "SimdFloatCmpOp",
+                 "fplibMax<uint32_t>(cOp1, cOp2, fpscr)",
+                 "fplibMax<uint64_t>(cOp1, cOp2, fpscr)")
+    buildBinFpOp("fminnm", "FMinNM", "FpRegRegRegOp", "SimdFloatCmpOp",
+                 "fplibMinNum<uint32_t>(cOp1, cOp2, fpscr)",
+                 "fplibMinNum<uint64_t>(cOp1, cOp2, fpscr)")
+    buildBinFpOp("fmaxnm", "FMaxNM", "FpRegRegRegOp", "SimdFloatCmpOp",
+                 "fplibMaxNum<uint32_t>(cOp1, cOp2, fpscr)",
+                 "fplibMaxNum<uint64_t>(cOp1, cOp2, fpscr)")
+
+    def buildUnaryFpOp(name, Name, base, opClass, singleOp, doubleOp = None):
+        if doubleOp is None:
+            doubleOp = singleOp
+        global header_output, decoder_output, exec_output
+
+        code = singleIntConvCode % { "op": singleOp }
+        sIop = InstObjParams(name, Name + "S", base,
+                { "code": code,
+                  "op_class": opClass }, [])
+        code = doubleIntConvCode % { "op": doubleOp }
+        dIop = InstObjParams(name, Name + "D", base,
+                { "code": code,
+                  "op_class": opClass }, [])
+
+        declareTempl     = eval(         base + "Declare");
+        constructorTempl = eval("AA64" + base + "Constructor");
+
+        for iop in sIop, dIop:
+            header_output  += declareTempl.subst(iop)
+            decoder_output += constructorTempl.subst(iop)
+            exec_output    += BasicExecute.subst(iop)
+
+    buildUnaryFpOp("fsqrt", "FSqrt", "FpRegRegOp", "SimdFloatSqrtOp",
+                   "fplibSqrt<uint32_t>(cOp1, fpscr)", "fplibSqrt<uint64_t>(cOp1, fpscr)")
+
+    def buildSimpleUnaryFpOp(name, Name, base, opClass, singleOp,
+                             doubleOp = None, isIntConv = True):
+        if doubleOp is None:
+            doubleOp = singleOp
+        global header_output, decoder_output, exec_output
+
+        if isIntConv:
+            sCode = singleIntConvCode
+            dCode = doubleIntConvCode
+        else:
+            sCode = singleCode
+            dCode = doubleCode
+
+        for code, op, suffix in [[sCode, singleOp, "S"],
+                                 [dCode, doubleOp, "D"]]:
+            iop = InstObjParams(name, Name + suffix, base,
+                { "code": code % { "op": op },
+                  "op_class": opClass }, [])
+
+            declareTempl     = eval(         base + "Declare");
+            constructorTempl = eval("AA64" + base + "Constructor");
+
+            header_output  += declareTempl.subst(iop)
+            decoder_output += constructorTempl.subst(iop)
+            exec_output    += BasicExecute.subst(iop)
+
+    buildSimpleUnaryFpOp("fneg", "FNeg", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibNeg<uint32_t>(cOp1)", "fplibNeg<uint64_t>(cOp1)")
+    buildSimpleUnaryFpOp("fabs", "FAbs", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibAbs<uint32_t>(cOp1)", "fplibAbs<uint64_t>(cOp1)")
+    buildSimpleUnaryFpOp("frintn", "FRIntN", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)",
+                         "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEEVEN, false, fpscr)")
+    buildSimpleUnaryFpOp("frintp", "FRIntP", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibRoundInt<uint32_t>(cOp1, FPRounding_POSINF, false, fpscr)",
+                         "fplibRoundInt<uint64_t>(cOp1, FPRounding_POSINF, false, fpscr)")
+    buildSimpleUnaryFpOp("frintm", "FRIntM", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibRoundInt<uint32_t>(cOp1, FPRounding_NEGINF, false, fpscr)",
+                         "fplibRoundInt<uint64_t>(cOp1, FPRounding_NEGINF, false, fpscr)")
+    buildSimpleUnaryFpOp("frintz", "FRIntZ", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibRoundInt<uint32_t>(cOp1, FPRounding_ZERO, false, fpscr)",
+                         "fplibRoundInt<uint64_t>(cOp1, FPRounding_ZERO, false, fpscr)")
+    buildSimpleUnaryFpOp("frinta", "FRIntA", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibRoundInt<uint32_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)",
+                         "fplibRoundInt<uint64_t>(cOp1, FPRounding_TIEAWAY, false, fpscr)")
+    buildSimpleUnaryFpOp("frinti", "FRIntI", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), false, fpscr)",
+                         "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), false, fpscr)")
+    buildSimpleUnaryFpOp("frintx", "FRIntX", "FpRegRegOp", "SimdFloatMiscOp",
+                         "fplibRoundInt<uint32_t>(cOp1, FPCRRounding(fpscr), true, fpscr)",
+                         "fplibRoundInt<uint64_t>(cOp1, FPCRRounding(fpscr), true, fpscr)")
+}};
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    # Creates the integer to floating point instructions, including variants for
+    # signed/unsigned, float/double, etc
+    for regL, regOpL, width in [["W", "w", 32],
+                                ["X", "d", 64]]:
+        for isDouble in True, False:
+            for us, usCode in [["U", "uint%d_t cSrc = %sOp1_u%s;" %(width, regL, regOpL)],
+                               ["S", "int%d_t  cSrc = %sOp1_u%s;" %(width, regL, regOpL)]]:
+                fcvtIntFpDCode = vfp64EnabledCheckCode + '''
+                    FPSCR fpscr = (FPSCR) FpscrExc;
+                    %s
+                ''' %(usCode)
+
+                if isDouble:
+                    fcvtIntFpDCode += '''
+                        uint64_t cDest = fplibFixedToFP<uint64_t>(cSrc, 0,
+                            %s, FPCRRounding(fpscr), fpscr);
+                        AA64FpDestP0_uw = cDest;
+                        AA64FpDestP1_uw = cDest >> 32;
+                    ''' % ("true" if us == "U" else "false")
+                else:
+                    fcvtIntFpDCode += '''
+                        uint32_t cDest = fplibFixedToFP<uint32_t>(cSrc, 0,
+                            %s, FPCRRounding(fpscr), fpscr);
+                        AA64FpDestP0_uw = cDest;
+                        AA64FpDestP1_uw = 0;
+                    ''' % ("true" if us == "U" else "false")
+                fcvtIntFpDCode += '''
+                    AA64FpDestP2_uw = 0;
+                    AA64FpDestP3_uw = 0;
+                    FpscrExc = fpscr;
+                '''
+
+                instName = "Fcvt%s%sIntFp%s" %(regL, us, "D" if isDouble else "S")
+                mnem     = "%scvtf" %(us.lower())
+                fcvtIntFpDIop = InstObjParams(mnem, instName, "FpRegRegOp",
+                                                { "code": fcvtIntFpDCode,
+                                                  "op_class": "SimdFloatCvtOp" }, [])
+                header_output  += FpRegRegOpDeclare.subst(fcvtIntFpDIop);
+                decoder_output += AA64FpRegRegOpConstructor.subst(fcvtIntFpDIop);
+                exec_output    += BasicExecute.subst(fcvtIntFpDIop);
+
+    # Generates the floating point to integer conversion instructions in various
+    # variants, eg signed/unsigned
+    def buildFpCvtIntOp(isDouble, isSigned, isXReg):
+        global header_output, decoder_output, exec_output
+
+        for rmode, roundingMode in [["N", "FPRounding_TIEEVEN"],
+                                    ["P", "FPRounding_POSINF"],
+                                    ["M", "FPRounding_NEGINF"],
+                                    ["Z", "FPRounding_ZERO"],
+                                    ["A", "FPRounding_TIEAWAY"]]:
+            fcvtFpIntCode = vfp64EnabledCheckCode + '''
+                FPSCR fpscr = (FPSCR) FpscrExc;'''
+            if isDouble:
+                fcvtFpIntCode += '''
+                uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+                '''
+            else:
+                fcvtFpIntCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
+
+            fcvtFpIntCode += '''
+                %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 0, %s, %s, fpscr);
+                FpscrExc = fpscr;
+            ''' %("X"      if isXReg   else "W",
+                  "64"     if isDouble else "32",
+                  "64"     if isXReg   else "32",
+                  "false"  if isSigned else "true",
+                  roundingMode)
+
+            instName = "FcvtFp%sInt%s%s%s" %("S" if isSigned else "U",
+                                             "X" if isXReg   else "W",
+                                             "D" if isDouble else "S", rmode)
+            mnem     = "fcvt%s%s" %(rmode, "s" if isSigned else "u")
+            fcvtFpIntIop = InstObjParams(mnem, instName, "FpRegRegOp",
+                                        { "code": fcvtFpIntCode,
+                                        "op_class": "SimdFloatCvtOp" }, [])
+            header_output  += FpRegRegOpDeclare.subst(fcvtFpIntIop);
+            decoder_output += FpRegRegOpConstructor.subst(fcvtFpIntIop);
+            exec_output    += BasicExecute.subst(fcvtFpIntIop);
+
+    # Now actually do the building with the different variants
+    for isDouble in True, False:
+       for isSigned in True, False:
+           for isXReg in True, False:
+             buildFpCvtIntOp(isDouble, isSigned, isXReg)
+
+    fcvtFpSFpDCode = vfp64EnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        uint64_t cDest = fplibConvert<uint32_t, uint64_t>(AA64FpOp1P0_uw,
+            FPCRRounding(fpscr), fpscr);
+        AA64FpDestP0_uw = cDest;
+        AA64FpDestP1_uw = cDest >> 32;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+        FpscrExc = fpscr;
+    '''
+    fcvtFpSFpDIop = InstObjParams("fcvt", "FCvtFpSFpD", "FpRegRegOp",
+                                     { "code": fcvtFpSFpDCode,
+                                       "op_class": "SimdFloatCvtOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fcvtFpSFpDIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpSFpDIop);
+    exec_output    += BasicExecute.subst(fcvtFpSFpDIop);
+
+    fcvtFpDFpSCode = vfp64EnabledCheckCode + '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+        AA64FpDestP0_uw = fplibConvert<uint64_t, uint32_t>(cOp1,
+            FPCRRounding(fpscr), fpscr);
+        AA64FpDestP1_uw = 0;
+        AA64FpDestP2_uw = 0;
+        AA64FpDestP3_uw = 0;
+        FpscrExc = fpscr;
+    '''
+    fcvtFpDFpSIop = InstObjParams("fcvt", "FcvtFpDFpS", "FpRegRegOp",
+                                 {"code":     fcvtFpDFpSCode,
+                                  "op_class": "SimdFloatCvtOp" }, [])
+    header_output  += FpRegRegOpDeclare.subst(fcvtFpDFpSIop);
+    decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpDFpSIop);
+    exec_output    += BasicExecute.subst(fcvtFpDFpSIop);
+
+    # Half precision to single or double precision conversion
+    for isDouble in True, False:
+        code = vfp64EnabledCheckCode + '''
+            FPSCR fpscr = (FPSCR) FpscrExc;
+            %s cDest = fplibConvert<uint16_t, uint%s_t>(AA64FpOp1P0_uw,
+                FPCRRounding(fpscr), fpscr);
+        ''' % ("uint64_t" if isDouble else "uint32_t",
+               "64" if isDouble else "32")
+        if isDouble:
+            code += '''
+                AA64FpDestP0_uw = cDest;
+                AA64FpDestP1_uw = cDest >> 32;
+            '''
+        else:
+            code += '''
+                AA64FpDestP0_uw = cDest;
+                AA64FpDestP1_uw = 0;
+            '''
+        code += '''
+            AA64FpDestP2_uw = 0;
+            AA64FpDestP3_uw = 0;
+            FpscrExc = fpscr;
+        '''
+
+        instName = "FcvtFpHFp%s" %("D" if isDouble else "S")
+        fcvtFpHFpIop = InstObjParams("fcvt", instName, "FpRegRegOp",
+                                     { "code": code,
+                                       "op_class": "SimdFloatCvtOp" }, [])
+        header_output  += FpRegRegOpDeclare.subst(fcvtFpHFpIop);
+        decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpHFpIop);
+        exec_output    += BasicExecute.subst(fcvtFpHFpIop);
+
+    # single or double precision to Half precision conversion
+    for isDouble in True, False:
+        code = vfp64EnabledCheckCode + '''
+            FPSCR fpscr = (FPSCR) FpscrExc;
+            %s;
+            AA64FpDestP0_uw = fplibConvert<uint%s_t, uint16_t>(cOp1,
+                FPCRRounding(fpscr), fpscr);
+            AA64FpDestP1_uw = 0;
+            AA64FpDestP2_uw = 0;
+            AA64FpDestP3_uw = 0;
+            FpscrExc = fpscr;
+        ''' % ("uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+               if isDouble else "uint32_t cOp1 = AA64FpOp1P0_uw",
+               "64" if isDouble else "32")
+
+        instName = "FcvtFp%sFpH" %("D" if isDouble else "S")
+        fcvtFpFpHIop = InstObjParams("fcvt", instName, "FpRegRegOp",
+                                     { "code": code,
+                                       "op_class": "SimdFloatCvtOp" }, [])
+        header_output  += FpRegRegOpDeclare.subst(fcvtFpFpHIop);
+        decoder_output += AA64FpRegRegOpConstructor.subst(fcvtFpFpHIop);
+        exec_output    += BasicExecute.subst(fcvtFpFpHIop);
+
+    # Build the various versions of the floating point compare instructions
+    def buildFCmpOp(isQuiet, isDouble, isImm):
+        global header_output, decoder_output, exec_output
+
+        fcmpCode = vfp64EnabledCheckCode + '''
+            FPSCR fpscr = (FPSCR) FpscrExc;
+            %s cOp1 = %s;
+        ''' % ("uint64_t" if isDouble else "uint32_t",
+               "AA64FpDestP0_uw | (uint64_t)AA64FpDestP1_uw << 32"
+               if isDouble else "AA64FpDestP0_uw")
+        if isImm:
+            fcmpCode += '''
+                %s cOp2 = imm;
+            ''' % ("uint64_t" if isDouble else "uint32_t")
+        else:
+            fcmpCode += '''
+                %s cOp2  = %s;
+            ''' % ("uint64_t" if isDouble else "uint32_t",
+                   "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+                   if isDouble else "AA64FpOp1P0_uw")
+        fcmpCode += '''
+            int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
+            CondCodesNZ = cc >> 2 & 3;
+            CondCodesC = cc >> 1 & 1;
+            CondCodesV = cc & 1;
+            FpCondCodes = fpscr & FpCondCodesMask;
+            FpscrExc    = fpscr;
+        ''' % ("64" if isDouble else "32", "false" if isQuiet else "true")
+
+        typeName = "Imm" if isImm else "Reg"
+        instName = "FCmp%s%s%s" %(""  if isQuiet  else "E", typeName,
+                                  "D" if isDouble else "S")
+        fcmpIop = InstObjParams("fcmp%s" %(""  if isQuiet else "e"), instName,
+                                "FpReg%sOp" %(typeName),
+                               {"code":     fcmpCode,
+                                "op_class": "SimdFloatCmpOp"}, [])
+
+        declareTemp     = eval("FpReg%sOpDeclare"         %(typeName));
+        constructorTemp = eval("AA64FpReg%sOpConstructor" %(typeName));
+        header_output  += declareTemp.subst(fcmpIop);
+        decoder_output += constructorTemp.subst(fcmpIop);
+        exec_output    += BasicExecute.subst(fcmpIop);
+
+    for isQuiet in True, False:
+        for isDouble in True, False:
+            for isImm in True, False:
+                buildFCmpOp(isQuiet, isDouble, isImm)
+
+    # Build the various versions of the conditional floating point compare
+    # instructions
+    def buildFCCmpOp(isQuiet, isDouble):
+        global header_output, decoder_output, exec_output
+
+        fccmpCode = vfp64EnabledCheckCode + '''
+            FPSCR fpscr = (FPSCR) FpscrExc;
+            if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+                %s cOp1 = %s;
+                %s cOp2 = %s;
+                int cc = fplibCompare<uint%s_t>(cOp1, cOp2, %s, fpscr);
+                CondCodesNZ = cc >> 2 & 3;
+                CondCodesC = cc >> 1 & 1;
+                CondCodesV = cc & 1;
+            } else {
+                CondCodesNZ = (defCc >> 2) & 0x3;
+                CondCodesC  = (defCc >> 1) & 0x1;
+                CondCodesV  = defCc & 0x1;
+            }
+            FpCondCodes = fpscr & FpCondCodesMask;
+            FpscrExc    = fpscr;
+        ''' % ("uint64_t" if isDouble else "uint32_t",
+               "AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32"
+               if isDouble else "AA64FpOp1P0_uw",
+               "uint64_t" if isDouble else "uint32_t",
+               "AA64FpOp2P0_uw | (uint64_t)AA64FpOp2P1_uw << 32"
+               if isDouble else "AA64FpOp2P0_uw",
+               "64" if isDouble else "32", "false" if isQuiet else "true")
+
+        instName = "FCCmp%sReg%s" %(""  if isQuiet  else "E",
+                                    "D" if isDouble else "S")
+        fccmpIop = InstObjParams("fccmp%s" %(""  if isQuiet  else "e"),
+                                 instName, "FpCondCompRegOp",
+                                {"code":           fccmpCode,
+                                 "op_class":       "SimdFloatCmpOp"}, [])
+        header_output  += DataXCondCompRegDeclare.subst(fccmpIop);
+        decoder_output += DataXCondCompRegConstructor.subst(fccmpIop);
+        exec_output    += BasicExecute.subst(fccmpIop);
+
+    for isQuiet in True, False:
+        for isDouble in True, False:
+            buildFCCmpOp(isQuiet, isDouble)
+
+}};
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    # Generates the variants of the floating to fixed point instructions
+    def buildFpCvtFixedOp(isSigned, isDouble, isXReg):
+        global header_output, decoder_output, exec_output
+
+        fcvtFpFixedCode = vfp64EnabledCheckCode + '''
+            FPSCR fpscr = (FPSCR) FpscrExc;
+        '''
+        if isDouble:
+            fcvtFpFixedCode += '''
+                uint64_t cOp1 = AA64FpOp1P0_uw | (uint64_t)AA64FpOp1P1_uw << 32;
+            '''
+        else:
+            fcvtFpFixedCode += "uint32_t cOp1 = AA64FpOp1P0_uw;"
+        fcvtFpFixedCode += '''
+            %sDest = fplibFPToFixed<uint%s_t, uint%s_t>(cOp1, 64 - imm, %s,
+                FPRounding_ZERO, fpscr);
+            FpscrExc = fpscr;
+        ''' %("X"      if isXReg   else "W",
+              "64"     if isDouble else "32",
+              "64"     if isXReg   else "32",
+              "false"  if isSigned else "true")
+
+        instName = "FcvtFp%sFixed%s%s" %("S" if isSigned else "U",
+                                         "D" if isDouble else "S",
+                                         "X" if isXReg   else "W")
+        mnem = "fcvtz%s" %("s" if isSigned else "u")
+        fcvtFpFixedIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
+                                       { "code": fcvtFpFixedCode,
+                                         "op_class": "SimdFloatCvtOp" }, [])
+        header_output  += FpRegRegImmOpDeclare.subst(fcvtFpFixedIop);
+        decoder_output += AA64FpRegRegImmOpConstructor.subst(fcvtFpFixedIop);
+        exec_output    += BasicExecute.subst(fcvtFpFixedIop);
+
+    # Generates the variants of the fixed to floating point instructions
+    def buildFixedCvtFpOp(isSigned, isDouble, isXReg):
+        global header_output, decoder_output, exec_output
+
+        srcRegType = "X" if isXReg   else "W"
+        fcvtFixedFpCode = vfp64EnabledCheckCode + '''
+            FPSCR fpscr = (FPSCR) FpscrExc;
+            %s result = fplibFixedToFP<uint%s_t>((%s%s_t)%sOp1, 64 - imm,
+                %s, FPCRRounding(fpscr), fpscr);
+        ''' %("uint64_t" if isDouble else "uint32_t",
+              "64" if isDouble else "32",
+              "int" if isSigned else "uint", "64" if isXReg else "32",
+              srcRegType,
+              "false" if isSigned else "true")
+        if isDouble:
+            fcvtFixedFpCode += '''
+                AA64FpDestP0_uw = result;
+                AA64FpDestP1_uw = result >> 32;
+            '''
+        else:
+            fcvtFixedFpCode += '''
+                AA64FpDestP0_uw = result;
+                AA64FpDestP1_uw = 0;
+            '''
+        fcvtFixedFpCode += '''
+            AA64FpDestP2_uw = 0;
+            AA64FpDestP3_uw = 0;
+            FpscrExc = fpscr;
+        '''
+
+        instName = "Fcvt%sFixedFp%s%s" %("S" if isSigned else "U",
+                                         "D" if isDouble else "S",
+                                         srcRegType)
+        mnem = "%scvtf" %("s" if isSigned else "u")
+        fcvtFixedFpIop = InstObjParams(mnem, instName, "FpRegRegImmOp",
+                                       { "code":     fcvtFixedFpCode,
+                                         "op_class": "SimdFloatCvtOp" }, [])
+        header_output  += FpRegRegImmOpDeclare.subst(fcvtFixedFpIop);
+        decoder_output += FpRegRegImmOpConstructor.subst(fcvtFixedFpIop);
+        exec_output    += BasicExecute.subst(fcvtFixedFpIop);
+
+    # loop over the variants building the instructions for each
+    for isXReg in True, False:
+        for isDouble in True, False:
+            for isSigned in True, False:
+                buildFpCvtFixedOp(isSigned, isDouble, isXReg)
+                buildFixedCvtFpOp(isSigned, isDouble, isXReg)
+}};
+
+let {{
+
+    header_output  = ""
+    decoder_output = ""
+    exec_output    = ""
+
+    for isDouble in True, False:
+        code = '''
+            if (testPredicate(CondCodesNZ, CondCodesC, CondCodesV, condCode)) {
+                AA64FpDestP0_uw = AA64FpOp1P0_uw;
+        '''
+        if isDouble:
+            code += '''
+                    AA64FpDestP1_uw = AA64FpOp1P1_uw;
+                } else {
+                    AA64FpDestP0_uw = AA64FpOp2P0_uw;
+                    AA64FpDestP1_uw = AA64FpOp2P1_uw;
+                }
+            '''
+        else:
+            code += '''
+                } else {
+                    AA64FpDestP0_uw = AA64FpOp2P0_uw;
+                }
+                AA64FpDestP1_uw = 0;
+            '''
+        code += '''
+            AA64FpDestP2_uw = 0;
+            AA64FpDestP3_uw = 0;
+        '''
+
+        iop = InstObjParams("fcsel", "FCSel%s" %("D" if isDouble else "S"),
+                            "FpCondSelOp", code)
+        header_output  += DataXCondSelDeclare.subst(iop)
+        decoder_output += DataXCondSelConstructor.subst(iop)
+        exec_output    += BasicExecute.subst(iop)
+}};
diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa
index c01e87df8..9d90f7779 100644
--- a/src/arch/arm/isa/insts/insts.isa
+++ b/src/arch/arm/isa/insts/insts.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2012 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -37,6 +37,9 @@
 //
 // Authors: Gabe Black
 
+//AArch64 instructions
+##include "aarch64.isa"
+
 //Basic forms of various templates
 ##include "basic.isa"
 
@@ -46,8 +49,15 @@
 //Loads of a single item
 ##include "ldr.isa"
 
+//Loads of a single item, AArch64
+##include "ldr64.isa"
+
 //Miscellaneous instructions that don't fit elsewhere
 ##include "misc.isa"
+##include "misc64.isa"
+
+//Stores of a single item, AArch64
+##include "str64.isa"
 
 //Stores of a single item
 ##include "str.isa"
@@ -61,8 +71,12 @@
 //Data processing instructions
 ##include "data.isa"
 
+//AArch64 data processing instructions
+##include "data64.isa"
+
 //Branches
 ##include "branch.isa"
+##include "branch64.isa"
 
 //Multiply
 ##include "mult.isa"
@@ -72,9 +86,14 @@
 
 //VFP
 ##include "fp.isa"
+##include "fp64.isa"
 
 //Neon
 ##include "neon.isa"
 
+//AArch64 Neon
+##include "neon64.isa"
+##include "neon64_mem.isa"
+
 //m5 Psuedo-ops
 ##include "m5ops.isa"
diff --git a/src/arch/arm/isa/insts/ldr.isa b/src/arch/arm/isa/insts/ldr.isa
index f599fa4b9..6bfe40118 100644
--- a/src/arch/arm/isa/insts/ldr.isa
+++ b/src/arch/arm/isa/insts/ldr.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -38,6 +38,7 @@
 // Authors: Gabe Black
 
 let {{
+    import math
 
     header_output = ""
     decoder_output = ""
@@ -78,7 +79,8 @@ let {{
              newDecoder,
              newExec) = self.fillTemplates(self.name, self.Name, codeBlobs,
                                            self.memFlags, instFlags, base,
-                                           wbDecl, pcDecl, self.rasPop)
+                                           wbDecl, pcDecl, self.rasPop,
+                                           self.size, self.sign)
 
             header_output += newHeader
             decoder_output += newDecoder
@@ -160,7 +162,7 @@ let {{
                                       self.size, self.sign, self.user)
 
             # Add memory request flags where necessary
-            self.memFlags.append("%d" % (self.size - 1))
+            self.memFlags.append("%d" % int(math.log(self.size, 2)))
             if self.user:
                 self.memFlags.append("ArmISA::TLB::UserMode")
 
diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa
new file mode 100644
index 000000000..78460f661
--- /dev/null
+++ b/src/arch/arm/isa/insts/ldr64.isa
@@ -0,0 +1,446 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    class LoadInst64(LoadStoreInst):
+        execBase = 'Load64'
+        micro = False
+
+        def __init__(self, mnem, Name, size=4, sign=False, user=False,
+                     literal=False, flavor="normal", top=False):
+            super(LoadInst64, self).__init__()
+
+            self.name = mnem
+            self.Name = Name
+            self.size = size
+            self.sign = sign
+            self.user = user
+            self.literal = literal
+            self.flavor = flavor
+            self.top = top
+
+            self.memFlags = ["ArmISA::TLB::MustBeOne"]
+            self.instFlags = []
+            self.codeBlobs = {"postacc_code" : ""}
+
+            # Add memory request flags where necessary
+            if self.user:
+                self.memFlags.append("ArmISA::TLB::UserMode")
+
+            if self.flavor == "dprefetch":
+                self.memFlags.append("Request::PREFETCH")
+                self.instFlags = ['IsDataPrefetch']
+            elif self.flavor == "iprefetch":
+                self.memFlags.append("Request::PREFETCH")
+                self.instFlags = ['IsInstPrefetch']
+            if self.micro:
+                self.instFlags.append("IsMicroop")
+
+            if self.flavor in ("acexp", "exp"):
+                # For exclusive pair ops alignment check is based on total size
+                self.memFlags.append("%d" % int(math.log(self.size, 2) + 1))
+            elif not (self.size == 16 and self.top):
+                # Only the first microop should perform alignment checking.
+                self.memFlags.append("%d" % int(math.log(self.size, 2)))
+
+            if self.flavor not in ("acquire", "acex", "exclusive",
+                                   "acexp", "exp"):
+                self.memFlags.append("ArmISA::TLB::AllowUnaligned")
+
+            if self.flavor in ("acquire", "acex", "acexp"):
+                self.instFlags.extend(["IsMemBarrier",
+                                       "IsWriteBarrier",
+                                       "IsReadBarrier"])
+            if self.flavor in ("acex", "exclusive", "exp", "acexp"):
+                self.memFlags.append("Request::LLSC")
+
+        def buildEACode(self):
+            # Address computation code
+            eaCode = ""
+            if self.flavor == "fp":
+                eaCode += vfp64EnabledCheckCode
+
+            if self.literal:
+                eaCode += "EA = RawPC"
+            else:
+                eaCode += SPAlignmentCheckCode + "EA = XBase"
+
+            if self.size == 16:
+                if self.top:
+                    eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)"
+                else:
+                    eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)"
+            if not self.post:
+                eaCode += self.offset
+            eaCode += ";"
+
+            self.codeBlobs["ea_code"] = eaCode
+
+        def emitHelper(self, base='Memory64', wbDecl=None):
+            global header_output, decoder_output, exec_output
+
+            # If this is a microop itself, don't allow anything that would
+            # require further microcoding.
+            if self.micro:
+                assert not wbDecl
+
+            fa_code = None
+            if not self.micro and self.flavor in ("normal", "widen", "acquire"):
+                fa_code = '''
+                    fault->annotate(ArmFault::SAS, %s);
+                    fault->annotate(ArmFault::SSE, %s);
+                    fault->annotate(ArmFault::SRT, dest);
+                    fault->annotate(ArmFault::SF, %s);
+                    fault->annotate(ArmFault::AR, %s);
+                ''' % ("0" if self.size == 1 else
+                       "1" if self.size == 2 else
+                       "2" if self.size == 4 else "3",
+                       "true" if self.sign else "false",
+                       "true" if (self.size == 8 or
+                                  self.flavor == "widen") else "false",
+                       "true" if self.flavor == "acquire" else "false")
+
+            (newHeader, newDecoder, newExec) = \
+                self.fillTemplates(self.name, self.Name, self.codeBlobs,
+                                   self.memFlags, self.instFlags,
+                                   base, wbDecl, faCode=fa_code)
+
+            header_output += newHeader
+            decoder_output += newDecoder
+            exec_output += newExec
+
+    class LoadImmInst64(LoadInst64):
+        def __init__(self, *args, **kargs):
+            super(LoadImmInst64, self).__init__(*args, **kargs)
+            self.offset = " + imm"
+
+            self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);"
+
+    class LoadRegInst64(LoadInst64):
+        def __init__(self, *args, **kargs):
+            super(LoadRegInst64, self).__init__(*args, **kargs)
+            self.offset = " + extendReg64(XOffset, type, shiftAmt, 64)"
+
+            self.wbDecl = \
+                "MicroAddXERegUop(machInst, base, base, " + \
+                "                 offset, type, shiftAmt);"
+
+    class LoadRawRegInst64(LoadInst64):
+        def __init__(self, *args, **kargs):
+            super(LoadRawRegInst64, self).__init__(*args, **kargs)
+            self.offset = ""
+
+    class LoadSingle64(LoadInst64):
+        def emit(self):
+            self.buildEACode()
+
+            # Code that actually handles the access
+            if self.flavor in ("dprefetch", "iprefetch"):
+                accCode = 'uint64_t temp M5_VAR_USED = Mem%s;'
+            elif self.flavor == "fp":
+                if self.size in (1, 2, 4):
+                    accCode = '''
+                        AA64FpDestP0_uw = cSwap(Mem%s,
+                                                isBigEndian64(xc->tcBase()));
+                        AA64FpDestP1_uw = 0;
+                        AA64FpDestP2_uw = 0;
+                        AA64FpDestP3_uw = 0;
+                    '''
+                elif self.size == 8 or (self.size == 16 and not self.top):
+                    accCode = '''
+                        uint64_t data = cSwap(Mem%s,
+                                              isBigEndian64(xc->tcBase()));
+                        AA64FpDestP0_uw = (uint32_t)data;
+                        AA64FpDestP1_uw = (data >> 32);
+                    '''
+                    # Only zero out the other half if this isn't part of a
+                    # pair of 8 byte loads implementing a 16 byte load.
+                    if self.size == 8:
+                        accCode += '''
+                            AA64FpDestP2_uw = 0;
+                            AA64FpDestP3_uw = 0;
+                        '''
+                elif self.size == 16 and self.top:
+                    accCode = '''
+                        uint64_t data = cSwap(Mem%s,
+                                              isBigEndian64(xc->tcBase()));
+                        AA64FpDestP2_uw = (uint32_t)data;
+                        AA64FpDestP3_uw = (data >> 32);
+                    '''
+            elif self.flavor == "widen" or self.size == 8:
+                accCode = "XDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));"
+            else:
+                accCode = "WDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));"
+            if self.size == 16:
+                accCode = accCode % buildMemSuffix(self.sign, 8)
+            else:
+                accCode = accCode % buildMemSuffix(self.sign, self.size)
+
+            self.codeBlobs["memacc_code"] = accCode
+
+            # Push it out to the output files
+            wbDecl = None
+            if self.writeback and not self.micro:
+                wbDecl = self.wbDecl
+            self.emitHelper(self.base, wbDecl)
+
+    class LoadDouble64(LoadInst64):
+        def emit(self):
+            self.buildEACode()
+
+            # Code that actually handles the access
+            if self.flavor == "fp":
+                accCode = '''
+                    uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase()));
+                    AA64FpDestP0_uw = (uint32_t)data;
+                    AA64FpDestP1_uw = 0;
+                    AA64FpDestP2_uw = 0;
+                    AA64FpDestP3_uw = 0;
+                    AA64FpDest2P0_uw = (data >> 32);
+                    AA64FpDest2P1_uw = 0;
+                    AA64FpDest2P2_uw = 0;
+                    AA64FpDest2P3_uw = 0;
+                '''
+            else:
+                if self.sign:
+                    if self.size == 4:
+                        accCode = '''
+                            uint64_t data = cSwap(Mem_ud,
+                                                  isBigEndian64(xc->tcBase()));
+                            XDest = sext<32>((uint32_t)data);
+                            XDest2 = sext<32>(data >> 32);
+                        '''
+                    elif self.size == 8:
+                        accCode = '''
+                            XDest = sext<64>(Mem_tud.a);
+                            XDest2 = sext<64>(Mem_tud.b);
+                        '''
+                else:
+                    if self.size == 4:
+                        accCode = '''
+                            uint64_t data = cSwap(Mem_ud,
+                                                  isBigEndian64(xc->tcBase()));
+                            XDest = (uint32_t)data;
+                            XDest2 = data >> 32;
+                        '''
+                    elif self.size == 8:
+                        accCode = '''
+                            XDest = Mem_tud.a;
+                            XDest2 = Mem_tud.b;
+                        '''
+            self.codeBlobs["memacc_code"] = accCode
+
+            # Push it out to the output files
+            wbDecl = None
+            if self.writeback and not self.micro:
+                wbDecl = self.wbDecl
+            self.emitHelper(self.base, wbDecl)
+
+    class LoadImm64(LoadImmInst64, LoadSingle64):
+        decConstBase = 'LoadStoreImm64'
+        base = 'ArmISA::MemoryImm64'
+        writeback = False
+        post = False
+
+    class LoadPre64(LoadImmInst64, LoadSingle64):
+        decConstBase = 'LoadStoreImm64'
+        base = 'ArmISA::MemoryPreIndex64'
+        writeback = True
+        post = False
+
+    class LoadPost64(LoadImmInst64, LoadSingle64):
+        decConstBase = 'LoadStoreImm64'
+        base = 'ArmISA::MemoryPostIndex64'
+        writeback = True
+        post = True
+
+    class LoadReg64(LoadRegInst64, LoadSingle64):
+        decConstBase = 'LoadStoreReg64'
+        base = 'ArmISA::MemoryReg64'
+        writeback = False
+        post = False
+
+    class LoadRaw64(LoadRawRegInst64, LoadSingle64):
+        decConstBase = 'LoadStoreRaw64'
+        base = 'ArmISA::MemoryRaw64'
+        writeback = False
+        post = False
+
+    class LoadEx64(LoadRawRegInst64, LoadSingle64):
+        decConstBase = 'LoadStoreEx64'
+        base = 'ArmISA::MemoryEx64'
+        writeback = False
+        post = False
+
+    class LoadLit64(LoadImmInst64, LoadSingle64):
+        decConstBase = 'LoadStoreLit64'
+        base = 'ArmISA::MemoryLiteral64'
+        writeback = False
+        post = False
+
+    def buildLoads64(mnem, NameBase, size, sign, flavor="normal"):
+        LoadImm64(mnem, NameBase + "_IMM", size, sign, flavor=flavor).emit()
+        LoadPre64(mnem, NameBase + "_PRE", size, sign, flavor=flavor).emit()
+        LoadPost64(mnem, NameBase + "_POST", size, sign, flavor=flavor).emit()
+        LoadReg64(mnem, NameBase + "_REG", size, sign, flavor=flavor).emit()
+
+    buildLoads64("ldrb", "LDRB64", 1, False)
+    buildLoads64("ldrsb", "LDRSBW64", 1, True)
+    buildLoads64("ldrsb", "LDRSBX64", 1, True, flavor="widen")
+    buildLoads64("ldrh", "LDRH64", 2, False)
+    buildLoads64("ldrsh", "LDRSHW64", 2, True)
+    buildLoads64("ldrsh", "LDRSHX64", 2, True, flavor="widen")
+    buildLoads64("ldrsw", "LDRSW64", 4, True, flavor="widen")
+    buildLoads64("ldr", "LDRW64", 4, False)
+    buildLoads64("ldr", "LDRX64", 8, False)
+    buildLoads64("ldr", "LDRBFP64", 1, False, flavor="fp")
+    buildLoads64("ldr", "LDRHFP64", 2, False, flavor="fp")
+    buildLoads64("ldr", "LDRSFP64", 4, False, flavor="fp")
+    buildLoads64("ldr", "LDRDFP64", 8, False, flavor="fp")
+
+    LoadImm64("prfm", "PRFM64_IMM", 8, flavor="dprefetch").emit()
+    LoadReg64("prfm", "PRFM64_REG", 8, flavor="dprefetch").emit()
+    LoadLit64("prfm", "PRFM64_LIT", 8, literal=True, flavor="dprefetch").emit()
+    LoadImm64("prfum", "PRFUM64_IMM", 8, flavor="dprefetch").emit()
+
+    LoadImm64("ldurb", "LDURB64_IMM", 1, False).emit()
+    LoadImm64("ldursb", "LDURSBW64_IMM", 1, True).emit()
+    LoadImm64("ldursb", "LDURSBX64_IMM", 1, True, flavor="widen").emit()
+    LoadImm64("ldurh", "LDURH64_IMM", 2, False).emit()
+    LoadImm64("ldursh", "LDURSHW64_IMM", 2, True).emit()
+    LoadImm64("ldursh", "LDURSHX64_IMM", 2, True, flavor="widen").emit()
+    LoadImm64("ldursw", "LDURSW64_IMM", 4, True, flavor="widen").emit()
+    LoadImm64("ldur", "LDURW64_IMM", 4, False).emit()
+    LoadImm64("ldur", "LDURX64_IMM", 8, False).emit()
+    LoadImm64("ldur", "LDURBFP64_IMM", 1, flavor="fp").emit()
+    LoadImm64("ldur", "LDURHFP64_IMM", 2, flavor="fp").emit()
+    LoadImm64("ldur", "LDURSFP64_IMM", 4, flavor="fp").emit()
+    LoadImm64("ldur", "LDURDFP64_IMM", 8, flavor="fp").emit()
+
+    LoadImm64("ldtrb", "LDTRB64_IMM", 1, False, True).emit()
+    LoadImm64("ldtrsb", "LDTRSBW64_IMM", 1, True, True).emit()
+    LoadImm64("ldtrsb", "LDTRSBX64_IMM", 1, True, True, flavor="widen").emit()
+    LoadImm64("ldtrh", "LDTRH64_IMM", 2, False, True).emit()
+    LoadImm64("ldtrsh", "LDTRSHW64_IMM", 2, True, True).emit()
+    LoadImm64("ldtrsh", "LDTRSHX64_IMM", 2, True, True, flavor="widen").emit()
+    LoadImm64("ldtrsw", "LDTRSW64_IMM", 4, True, flavor="widen").emit()
+    LoadImm64("ldtr", "LDTRW64_IMM", 4, False, True).emit()
+    LoadImm64("ldtr", "LDTRX64_IMM", 8, False, True).emit()
+
+    LoadLit64("ldrsw", "LDRSWL64_LIT", 4, True, \
+              literal=True, flavor="widen").emit()
+    LoadLit64("ldr", "LDRWL64_LIT", 4, False, literal=True).emit()
+    LoadLit64("ldr", "LDRXL64_LIT", 8, False, literal=True).emit()
+    LoadLit64("ldr", "LDRSFP64_LIT", 4, literal=True, flavor="fp").emit()
+    LoadLit64("ldr", "LDRDFP64_LIT", 8, literal=True, flavor="fp").emit()
+
+    LoadRaw64("ldar", "LDARX64", 8, flavor="acquire").emit()
+    LoadRaw64("ldar", "LDARW64", 4, flavor="acquire").emit()
+    LoadRaw64("ldarh", "LDARH64", 2, flavor="acquire").emit()
+    LoadRaw64("ldarb", "LDARB64", 1, flavor="acquire").emit()
+
+    LoadEx64("ldaxr", "LDAXRX64", 8, flavor="acex").emit()
+    LoadEx64("ldaxr", "LDAXRW64", 4, flavor="acex").emit()
+    LoadEx64("ldaxrh", "LDAXRH64", 2, flavor="acex").emit()
+    LoadEx64("ldaxrb", "LDAXRB64", 1, flavor="acex").emit()
+
+    LoadEx64("ldxr", "LDXRX64", 8, flavor="exclusive").emit()
+    LoadEx64("ldxr", "LDXRW64", 4, flavor="exclusive").emit()
+    LoadEx64("ldxrh", "LDXRH64", 2, flavor="exclusive").emit()
+    LoadEx64("ldxrb", "LDXRB64", 1, flavor="exclusive").emit()
+
+    class LoadImmU64(LoadImm64):
+        decConstBase = 'LoadStoreImmU64'
+        micro = True
+
+    class LoadImmDU64(LoadImmInst64, LoadDouble64):
+        decConstBase = 'LoadStoreImmDU64'
+        base = 'ArmISA::MemoryDImm64'
+        micro = True
+        post = False
+        writeback = False
+
+    class LoadImmDouble64(LoadImmInst64, LoadDouble64):
+        decConstBase = 'LoadStoreImmDU64'
+        base = 'ArmISA::MemoryDImm64'
+        micro = False
+        post = False
+        writeback = False
+
+    class LoadRegU64(LoadReg64):
+        decConstBase = 'LoadStoreRegU64'
+        micro = True
+
+    class LoadLitU64(LoadLit64):
+        decConstBase = 'LoadStoreLitU64'
+        micro = True
+
+    LoadImmDouble64("ldaxp", "LDAXPW64", 4, flavor="acexp").emit()
+    LoadImmDouble64("ldaxp", "LDAXPX64", 8, flavor="acexp").emit()
+    LoadImmDouble64("ldxp", "LDXPW64", 4, flavor="exp").emit()
+    LoadImmDouble64("ldxp", "LDXPX64", 8, flavor="exp").emit()
+
+    LoadImmU64("ldrxi_uop", "MicroLdrXImmUop", 8).emit()
+    LoadRegU64("ldrxr_uop", "MicroLdrXRegUop", 8).emit()
+    LoadLitU64("ldrxl_uop", "MicroLdrXLitUop", 8, literal=True).emit()
+    LoadImmU64("ldrfpxi_uop", "MicroLdrFpXImmUop", 8, flavor="fp").emit()
+    LoadRegU64("ldrfpxr_uop", "MicroLdrFpXRegUop", 8, flavor="fp").emit()
+    LoadLitU64("ldrfpxl_uop", "MicroLdrFpXLitUop", 8, literal=True,
+               flavor="fp").emit()
+    LoadImmU64("ldrqbfpxi_uop", "MicroLdrQBFpXImmUop",
+               16, flavor="fp", top = False).emit()
+    LoadRegU64("ldrqbfpxr_uop", "MicroLdrQBFpXRegUop",
+               16, flavor="fp", top = False).emit()
+    LoadLitU64("ldrqbfpxl_uop", "MicroLdrQBFpXLitUop",
+               16, literal=True, flavor="fp", top = False).emit()
+    LoadImmU64("ldrqtfpxi_uop", "MicroLdrQTFpXImmUop",
+               16, flavor="fp", top = True).emit()
+    LoadRegU64("ldrqtfpxr_uop", "MicroLdrQTFpXRegUop",
+               16, flavor="fp", top = True).emit()
+    LoadLitU64("ldrqtfpxl_uop", "MicroLdrQTFpXLitUop",
+               16, literal=True, flavor="fp", top = True).emit()
+    LoadImmDU64("ldrduxi_uop", "MicroLdrDUXImmUop", 4, sign=False).emit()
+    LoadImmDU64("ldrdsxi_uop", "MicroLdrDSXImmUop", 4, sign=True).emit()
+    LoadImmDU64("ldrdfpxi_uop", "MicroLdrDFpXImmUop", 4, flavor="fp").emit()
+}};
diff --git a/src/arch/arm/isa/insts/m5ops.isa b/src/arch/arm/isa/insts/m5ops.isa
index 06ed34af8..928d1be0d 100644
--- a/src/arch/arm/isa/insts/m5ops.isa
+++ b/src/arch/arm/isa/insts/m5ops.isa
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2012-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -58,6 +58,7 @@ let {{
     armCode = '''
     PseudoInst::arm(xc->tcBase());
     '''
+
     armIop = InstObjParams("arm", "Arm", "PredOp",
                            { "code": armCode,
                              "predicate_test": predicateTest },
@@ -69,6 +70,7 @@ let {{
     quiesceCode = '''
     PseudoInst::quiesce(xc->tcBase());
     '''
+
     quiesceIop = InstObjParams("quiesce", "Quiesce", "PredOp",
                            { "code": quiesceCode,
                              "predicate_test": predicateTest },
@@ -81,6 +83,10 @@ let {{
     PseudoInst::quiesceNs(xc->tcBase(), join32to64(R1, R0));
     '''
 
+    quiesceNsCode64 = '''
+    PseudoInst::quiesceNs(xc->tcBase(), X0);
+    '''
+
     quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs", "PredOp",
                            { "code": quiesceNsCode,
                              "predicate_test": predicateTest },
@@ -89,10 +95,22 @@ let {{
     decoder_output += BasicConstructor.subst(quiesceNsIop)
     exec_output += QuiescePredOpExecute.subst(quiesceNsIop)
 
+    quiesceNsIop = InstObjParams("quiesceNs", "QuiesceNs64", "PredOp",
+                           { "code": quiesceNsCode64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative", "IsQuiesce"])
+    header_output += BasicDeclare.subst(quiesceNsIop)
+    decoder_output += BasicConstructor.subst(quiesceNsIop)
+    exec_output += QuiescePredOpExecute.subst(quiesceNsIop)
+
     quiesceCyclesCode = '''
     PseudoInst::quiesceCycles(xc->tcBase(), join32to64(R1, R0));
     '''
 
+    quiesceCyclesCode64 = '''
+    PseudoInst::quiesceCycles(xc->tcBase(), X0);
+    '''
+
     quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles", "PredOp",
                            { "code": quiesceCyclesCode,
                              "predicate_test": predicateTest },
@@ -101,12 +119,23 @@ let {{
     decoder_output += BasicConstructor.subst(quiesceCyclesIop)
     exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop)
 
+    quiesceCyclesIop = InstObjParams("quiesceCycles", "QuiesceCycles64", "PredOp",
+                           { "code": quiesceCyclesCode64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative", "IsQuiesce", "IsUnverifiable"])
+    header_output += BasicDeclare.subst(quiesceCyclesIop)
+    decoder_output += BasicConstructor.subst(quiesceCyclesIop)
+    exec_output += QuiescePredOpExecute.subst(quiesceCyclesIop)
+
     quiesceTimeCode = '''
     uint64_t qt_val = PseudoInst::quiesceTime(xc->tcBase());
     R0 = bits(qt_val, 31, 0);
     R1 = bits(qt_val, 63, 32);
     '''
 
+    quiesceTimeCode64 = '''
+    X0 = PseudoInst::quiesceTime(xc->tcBase());
+    '''
     quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime", "PredOp",
                            { "code": quiesceTimeCode,
                              "predicate_test": predicateTest },
@@ -115,12 +144,23 @@ let {{
     decoder_output += BasicConstructor.subst(quiesceTimeIop)
     exec_output += PredOpExecute.subst(quiesceTimeIop)
 
+    quiesceTimeIop = InstObjParams("quiesceTime", "QuiesceTime64", "PredOp",
+                           { "code": quiesceTimeCode64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative", "IsUnverifiable"])
+    header_output += BasicDeclare.subst(quiesceTimeIop)
+    decoder_output += BasicConstructor.subst(quiesceTimeIop)
+    exec_output += PredOpExecute.subst(quiesceTimeIop)
+
     rpnsCode = '''
     uint64_t rpns_val = PseudoInst::rpns(xc->tcBase());
     R0 = bits(rpns_val, 31, 0);
     R1 = bits(rpns_val, 63, 32);
     '''
 
+    rpnsCode64 = '''
+    X0 = PseudoInst::rpns(xc->tcBase());
+    '''
     rpnsIop = InstObjParams("rpns", "Rpns", "PredOp",
                            { "code": rpnsCode,
                              "predicate_test": predicateTest },
@@ -129,10 +169,22 @@ let {{
     decoder_output += BasicConstructor.subst(rpnsIop)
     exec_output += PredOpExecute.subst(rpnsIop)
 
+    rpnsIop = InstObjParams("rpns", "Rpns64", "PredOp",
+                           { "code": rpnsCode64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative", "IsUnverifiable"])
+    header_output += BasicDeclare.subst(rpnsIop)
+    decoder_output += BasicConstructor.subst(rpnsIop)
+    exec_output += PredOpExecute.subst(rpnsIop)
+
     wakeCpuCode = '''
     PseudoInst::wakeCPU(xc->tcBase(), join32to64(R1,R0));
     '''
 
+    wakeCpuCode64 = '''
+    PseudoInst::wakeCPU(xc->tcBase(), X0);
+    '''
+
     wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU", "PredOp",
                    { "code": wakeCpuCode,
                      "predicate_test": predicateTest },
@@ -141,6 +193,14 @@ let {{
     decoder_output += BasicConstructor.subst(wakeCPUIop)
     exec_output += PredOpExecute.subst(wakeCPUIop)
 
+    wakeCPUIop = InstObjParams("wakeCPU", "WakeCPU64", "PredOp",
+                   { "code": wakeCpuCode64,
+                     "predicate_test": predicateTest },
+                     ["IsNonSpeculative", "IsUnverifiable"])
+    header_output += BasicDeclare.subst(wakeCPUIop)
+    decoder_output += BasicConstructor.subst(wakeCPUIop)
+    exec_output += PredOpExecute.subst(wakeCPUIop)
+
     deprecated_ivlbIop = InstObjParams("deprecated_ivlb", "Deprecated_ivlb", "PredOp",
                            { "code": '''warn_once("Obsolete M5 ivlb instruction encountered.\\n");''',
                              "predicate_test": predicateTest })
@@ -171,6 +231,11 @@ let {{
     m5exit_code = '''
         PseudoInst::m5exit(xc->tcBase(), join32to64(R1, R0));
     '''
+
+    m5exit_code64 = '''
+        PseudoInst::m5exit(xc->tcBase(), X0);
+    '''
+
     m5exitIop = InstObjParams("m5exit", "M5exit", "PredOp",
                                    { "code": m5exit_code,
                                      "predicate_test": predicateTest },
@@ -190,6 +255,14 @@ let {{
     decoder_output += BasicConstructor.subst(m5failIop)
     exec_output += PredOpExecute.subst(m5failIop)
 
+    m5exitIop = InstObjParams("m5exit", "M5exit64", "PredOp",
+                                   { "code": m5exit_code64,
+                                     "predicate_test": predicateTest },
+                                     ["No_OpClass", "IsNonSpeculative"])
+    header_output += BasicDeclare.subst(m5exitIop)
+    decoder_output += BasicConstructor.subst(m5exitIop)
+    exec_output += PredOpExecute.subst(m5exitIop)
+
     loadsymbolCode = '''
     PseudoInst::loadsymbol(xc->tcBase());
     '''
@@ -208,6 +281,10 @@ let {{
     R1 = bits(ip_val, 63, 32);
     '''
 
+    initparamCode64 = '''
+    X0 = PseudoInst::initParam(xc->tcBase());
+    '''
+
     initparamIop = InstObjParams("initparam", "Initparam", "PredOp",
                            { "code": initparamCode,
                              "predicate_test": predicateTest },
@@ -216,10 +293,21 @@ let {{
     decoder_output += BasicConstructor.subst(initparamIop)
     exec_output += PredOpExecute.subst(initparamIop)
 
+    initparamIop = InstObjParams("initparam", "Initparam64", "PredOp",
+                           { "code": initparamCode64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative"])
+    header_output += BasicDeclare.subst(initparamIop)
+    decoder_output += BasicConstructor.subst(initparamIop)
+    exec_output += PredOpExecute.subst(initparamIop)
+
     resetstats_code = '''
     PseudoInst::resetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2));
     '''
 
+    resetstats_code64 = '''
+    PseudoInst::resetstats(xc->tcBase(), X0, X1);
+    '''
     resetstatsIop = InstObjParams("resetstats", "Resetstats", "PredOp",
                            { "code": resetstats_code,
                              "predicate_test": predicateTest },
@@ -228,9 +316,22 @@ let {{
     decoder_output += BasicConstructor.subst(resetstatsIop)
     exec_output += PredOpExecute.subst(resetstatsIop)
 
+    resetstatsIop = InstObjParams("resetstats", "Resetstats64", "PredOp",
+                           { "code": resetstats_code64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative"])
+    header_output += BasicDeclare.subst(resetstatsIop)
+    decoder_output += BasicConstructor.subst(resetstatsIop)
+    exec_output += PredOpExecute.subst(resetstatsIop)
+
     dumpstats_code = '''
     PseudoInst::dumpstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2));
     '''
+
+    dumpstats_code64 = '''
+    PseudoInst::dumpstats(xc->tcBase(), X0, X1);
+    '''
+
     dumpstatsIop = InstObjParams("dumpstats", "Dumpstats", "PredOp",
                            { "code": dumpstats_code,
                              "predicate_test": predicateTest },
@@ -239,9 +340,22 @@ let {{
     decoder_output += BasicConstructor.subst(dumpstatsIop)
     exec_output += PredOpExecute.subst(dumpstatsIop)
 
+    dumpstatsIop = InstObjParams("dumpstats", "Dumpstats64", "PredOp",
+                           { "code": dumpstats_code64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative"])
+    header_output += BasicDeclare.subst(dumpstatsIop)
+    decoder_output += BasicConstructor.subst(dumpstatsIop)
+    exec_output += PredOpExecute.subst(dumpstatsIop)
+
     dumpresetstats_code = '''
     PseudoInst::dumpresetstats(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2));
     '''
+
+    dumpresetstats_code64 = '''
+    PseudoInst::dumpresetstats(xc->tcBase(), X0, X1);
+    '''
+
     dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats", "PredOp",
                            { "code": dumpresetstats_code,
                              "predicate_test": predicateTest },
@@ -250,9 +364,22 @@ let {{
     decoder_output += BasicConstructor.subst(dumpresetstatsIop)
     exec_output += PredOpExecute.subst(dumpresetstatsIop)
 
+    dumpresetstatsIop = InstObjParams("dumpresetstats", "Dumpresetstats64", "PredOp",
+                           { "code": dumpresetstats_code64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative"])
+    header_output += BasicDeclare.subst(dumpresetstatsIop)
+    decoder_output += BasicConstructor.subst(dumpresetstatsIop)
+    exec_output += PredOpExecute.subst(dumpresetstatsIop)
+
     m5checkpoint_code = '''
     PseudoInst::m5checkpoint(xc->tcBase(), join32to64(R1, R0), join32to64(R3, R2));
     '''
+
+    m5checkpoint_code64 = '''
+    PseudoInst::m5checkpoint(xc->tcBase(), X0, X1);
+    '''
+
     m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint", "PredOp",
                            { "code": m5checkpoint_code,
                              "predicate_test": predicateTest },
@@ -261,11 +388,27 @@ let {{
     decoder_output += BasicConstructor.subst(m5checkpointIop)
     exec_output += PredOpExecute.subst(m5checkpointIop)
 
+    m5checkpointIop = InstObjParams("m5checkpoint", "M5checkpoint64", "PredOp",
+                           { "code": m5checkpoint_code64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative", "IsUnverifiable"])
+    header_output += BasicDeclare.subst(m5checkpointIop)
+    decoder_output += BasicConstructor.subst(m5checkpointIop)
+    exec_output += PredOpExecute.subst(m5checkpointIop)
+
     m5readfileCode = '''
     int n = 4;
     uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false);
     R0 = PseudoInst::readfile(xc->tcBase(), R0, join32to64(R3,R2), offset);
     '''
+
+    m5readfileCode64 = '''
+    int n = 4;
+    uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false);
+    n = 6;
+    X0 = PseudoInst::readfile(xc->tcBase(), (uint32_t)X0, X1, offset);
+    '''
+
     m5readfileIop = InstObjParams("m5readfile", "M5readfile", "PredOp",
                            { "code": m5readfileCode,
                              "predicate_test": predicateTest },
@@ -274,6 +417,14 @@ let {{
     decoder_output += BasicConstructor.subst(m5readfileIop)
     exec_output += PredOpExecute.subst(m5readfileIop)
 
+    m5readfileIop = InstObjParams("m5readfile", "M5readfile64", "PredOp",
+                           { "code": m5readfileCode64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative", "IsUnverifiable"])
+    header_output += BasicDeclare.subst(m5readfileIop)
+    decoder_output += BasicConstructor.subst(m5readfileIop)
+    exec_output += PredOpExecute.subst(m5readfileIop)
+
     m5writefileCode = '''
     int n = 4;
     uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false);
@@ -282,6 +433,16 @@ let {{
     R0 = PseudoInst::writefile(xc->tcBase(), R0, join32to64(R3,R2), offset,
                                 filenameAddr);
     '''
+
+    m5writefileCode64 = '''
+    int n = 4;
+    uint64_t offset = getArgument(xc->tcBase(), n, sizeof(uint64_t), false);
+    n = 6;
+    Addr filenameAddr = getArgument(xc->tcBase(), n, sizeof(Addr), false);
+    X0 = PseudoInst::writefile(xc->tcBase(), (uint32_t)X0, X1, offset,
+                                filenameAddr);
+    '''
+
     m5writefileIop = InstObjParams("m5writefile", "M5writefile", "PredOp",
                            { "code": m5writefileCode,
                              "predicate_test": predicateTest },
@@ -290,6 +451,14 @@ let {{
     decoder_output += BasicConstructor.subst(m5writefileIop)
     exec_output += PredOpExecute.subst(m5writefileIop)
 
+    m5writefileIop = InstObjParams("m5writefile", "M5writefile64", "PredOp",
+                           { "code": m5writefileCode64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative"])
+    header_output += BasicDeclare.subst(m5writefileIop)
+    decoder_output += BasicConstructor.subst(m5writefileIop)
+    exec_output += PredOpExecute.subst(m5writefileIop)
+
     m5breakIop = InstObjParams("m5break", "M5break", "PredOp",
                            { "code": "PseudoInst::debugbreak(xc->tcBase());",
                              "predicate_test": predicateTest },
@@ -309,6 +478,9 @@ let {{
     m5addsymbolCode = '''
     PseudoInst::addsymbol(xc->tcBase(), join32to64(R1, R0), R2);
     '''
+    m5addsymbolCode64 = '''
+    PseudoInst::addsymbol(xc->tcBase(), X0, (uint32_t)X1);
+    '''
     m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol", "PredOp",
                            { "code": m5addsymbolCode,
                              "predicate_test": predicateTest },
@@ -317,8 +489,17 @@ let {{
     decoder_output += BasicConstructor.subst(m5addsymbolIop)
     exec_output += PredOpExecute.subst(m5addsymbolIop)
 
+    m5addsymbolIop = InstObjParams("m5addsymbol", "M5addsymbol64", "PredOp",
+                           { "code": m5addsymbolCode64,
+                             "predicate_test": predicateTest },
+                             ["IsNonSpeculative"])
+    header_output += BasicDeclare.subst(m5addsymbolIop)
+    decoder_output += BasicConstructor.subst(m5addsymbolIop)
+    exec_output += PredOpExecute.subst(m5addsymbolIop)
+
     m5panicCode = '''panic("M5 panic instruction called at pc=%#x.",
                      xc->pcState().pc());'''
+
     m5panicIop = InstObjParams("m5panic", "M5panic", "PredOp",
                      { "code": m5panicCode,
                        "predicate_test": predicateTest },
@@ -332,6 +513,13 @@ let {{
                           join32to64(R1, R0),
                           join32to64(R3, R2)
                       );'''
+
+    m5workbeginCode64 = '''PseudoInst::workbegin(
+                          xc->tcBase(),
+                          X0,
+                          X1
+                      );'''
+
     m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin", "PredOp",
                      { "code": m5workbeginCode,
                        "predicate_test": predicateTest },
@@ -340,11 +528,26 @@ let {{
     decoder_output += BasicConstructor.subst(m5workbeginIop)
     exec_output += PredOpExecute.subst(m5workbeginIop)
 
+    m5workbeginIop = InstObjParams("m5workbegin", "M5workbegin64", "PredOp",
+                     { "code": m5workbeginCode64,
+                       "predicate_test": predicateTest },
+                       ["IsNonSpeculative"])
+    header_output += BasicDeclare.subst(m5workbeginIop)
+    decoder_output += BasicConstructor.subst(m5workbeginIop)
+    exec_output += PredOpExecute.subst(m5workbeginIop)
+
     m5workendCode = '''PseudoInst::workend(
                         xc->tcBase(),
                         join32to64(R1, R0),
                         join32to64(R3, R2)
                     );'''
+
+    m5workendCode64 = '''PseudoInst::workend(
+                        xc->tcBase(),
+                        X0,
+                        X1
+                    );'''
+
     m5workendIop = InstObjParams("m5workend", "M5workend", "PredOp",
                      { "code": m5workendCode,
                        "predicate_test": predicateTest },
@@ -353,4 +556,11 @@ let {{
     decoder_output += BasicConstructor.subst(m5workendIop)
     exec_output += PredOpExecute.subst(m5workendIop)
 
+    m5workendIop = InstObjParams("m5workend", "M5workend64", "PredOp",
+                     { "code": m5workendCode64,
+                       "predicate_test": predicateTest },
+                       ["IsNonSpeculative"])
+    header_output += BasicDeclare.subst(m5workendIop)
+    decoder_output += BasicConstructor.subst(m5workendIop)
+    exec_output += PredOpExecute.subst(m5workendIop)
 }};
diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa
index db36a3fff..f164595dd 100644
--- a/src/arch/arm/isa/insts/macromem.isa
+++ b/src/arch/arm/isa/insts/macromem.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -91,7 +91,8 @@ let {{
         SCTLR sctlr = Sctlr;
 
         CPSR new_cpsr =
-            cpsrWriteByInstr(old_cpsr, Spsr, 0xF, true, sctlr.nmfi);
+            cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true,
+                             sctlr.nmfi, xc->tcBase());
         Cpsr = ~CondCodesMask & new_cpsr;
         CondCodesNZ = new_cpsr.nz;
         CondCodesC = new_cpsr.c;
@@ -158,8 +159,8 @@ let {{
 
     header_output = decoder_output = exec_output = ''
 
-    loadIops = (microLdrUopIop, microLdrRetUopIop,
-                microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop)
+    loadIops = (microLdrUopIop, microLdrRetUopIop, microLdrFpUopIop,
+                microLdrDBFpUopIop, microLdrDTFpUopIop)
     storeIops = (microStrUopIop, microStrFpUopIop,
                  microStrDBFpUopIop, microStrDTFpUopIop)
     for iop in loadIops + storeIops:
@@ -178,7 +179,7 @@ let {{
 let {{
     exec_output = header_output = ''
 
-    eaCode = 'EA = URa + imm;'
+    eaCode = 'EA = XURa + imm;'
 
     for size in (1, 2, 3, 4, 6, 8, 12, 16):
         # Set up the memory access.
@@ -592,6 +593,26 @@ let {{
         URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC);
     '''
 
+    microAddXiUopIop = InstObjParams('addxi_uop', 'MicroAddXiUop',
+                                     'MicroIntImmXOp',
+                                     'XURa = XURb + imm;',
+                                     ['IsMicroop'])
+
+    microAddXiSpAlignUopIop = InstObjParams('addxi_uop', 'MicroAddXiSpAlignUop',
+                                            'MicroIntImmXOp', '''
+        if (isSP((IntRegIndex) urb) && bits(XURb, 3, 0) &&
+            SPAlignmentCheckEnabled(xc->tcBase())) {
+            return new SPAlignmentFault();
+        }
+        XURa = XURb + imm;
+    ''', ['IsMicroop'])
+
+    microAddXERegUopIop = InstObjParams('addxr_uop', 'MicroAddXERegUop',
+                                        'MicroIntRegXOp',
+                                        'XURa = XURb + ' + \
+                                            'extendReg64(XURc, type, shiftAmt, 64);',
+                                        ['IsMicroop'])
+
     microAddUopIop = InstObjParams('add_uop', 'MicroAddUop',
                                    'MicroIntRegOp',
                                    {'code': microAddUopCode,
@@ -604,6 +625,11 @@ let {{
                                      'predicate_test': predicateTest},
                                     ['IsMicroop'])
 
+    microSubXiUopIop = InstObjParams('subxi_uop', 'MicroSubXiUop',
+                                     'MicroIntImmXOp',
+                                     'XURa = XURb - imm;',
+                                     ['IsMicroop'])
+
     microSubUopCode = '''
         URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC);
     '''
@@ -631,8 +657,8 @@ let {{
                     SCTLR sctlr = Sctlr;
                     pNPC = URa;
                     CPSR new_cpsr =
-                    cpsrWriteByInstr(cpsrOrCondCodes, URb,
-                                     0xF, true, sctlr.nmfi);
+                    cpsrWriteByInstr(cpsrOrCondCodes, URb, Scr, Nsacr,
+                                     0xF, true, sctlr.nmfi, xc->tcBase());
                     Cpsr = ~CondCodesMask & new_cpsr;
                     NextThumb = new_cpsr.t;
                     NextJazelle = new_cpsr.j;
@@ -651,25 +677,37 @@ let {{
                                          ['IsMicroop'])
 
     header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \
+                    MicroIntImmDeclare.subst(microAddXiUopIop) + \
+                    MicroIntImmDeclare.subst(microAddXiSpAlignUopIop) + \
                     MicroIntImmDeclare.subst(microSubiUopIop) + \
+                    MicroIntImmDeclare.subst(microSubXiUopIop) + \
                     MicroIntRegDeclare.subst(microAddUopIop) + \
                     MicroIntRegDeclare.subst(microSubUopIop) + \
+                    MicroIntXERegDeclare.subst(microAddXERegUopIop) + \
                     MicroIntMovDeclare.subst(microUopRegMovIop) + \
                     MicroIntMovDeclare.subst(microUopRegMovRetIop) + \
                     MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop)
 
     decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \
+                     MicroIntImmXConstructor.subst(microAddXiUopIop) + \
+                     MicroIntImmXConstructor.subst(microAddXiSpAlignUopIop) + \
                      MicroIntImmConstructor.subst(microSubiUopIop) + \
+                     MicroIntImmXConstructor.subst(microSubXiUopIop) + \
                      MicroIntRegConstructor.subst(microAddUopIop) + \
                      MicroIntRegConstructor.subst(microSubUopIop) + \
+                     MicroIntXERegConstructor.subst(microAddXERegUopIop) + \
                      MicroIntMovConstructor.subst(microUopRegMovIop) + \
                      MicroIntMovConstructor.subst(microUopRegMovRetIop) + \
                      MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop)
 
     exec_output = PredOpExecute.subst(microAddiUopIop) + \
+                  BasicExecute.subst(microAddXiUopIop) + \
+                  BasicExecute.subst(microAddXiSpAlignUopIop) + \
                   PredOpExecute.subst(microSubiUopIop) + \
+                  BasicExecute.subst(microSubXiUopIop) + \
                   PredOpExecute.subst(microAddUopIop) + \
                   PredOpExecute.subst(microSubUopIop) + \
+                  BasicExecute.subst(microAddXERegUopIop) + \
                   PredOpExecute.subst(microUopRegMovIop) + \
                   PredOpExecute.subst(microUopRegMovRetIop) + \
                   PredOpExecute.subst(microUopSetPCCPSRIop)
@@ -681,6 +719,25 @@ let {{
     header_output = MacroMemDeclare.subst(iop)
     decoder_output = MacroMemConstructor.subst(iop)
 
+    iop = InstObjParams("ldpstp", "LdpStp", 'PairMemOp', "", [])
+    header_output += PairMemDeclare.subst(iop)
+    decoder_output += PairMemConstructor.subst(iop)
+
+    iopImm = InstObjParams("bigfpmemimm", "BigFpMemImm", "BigFpMemImmOp", "")
+    iopPre = InstObjParams("bigfpmempre", "BigFpMemPre", "BigFpMemPreOp", "")
+    iopPost = InstObjParams("bigfpmempost", "BigFpMemPost", "BigFpMemPostOp", "")
+    for iop in (iopImm, iopPre, iopPost):
+        header_output += BigFpMemImmDeclare.subst(iop)
+        decoder_output += BigFpMemImmConstructor.subst(iop)
+
+    iop = InstObjParams("bigfpmemreg", "BigFpMemReg", "BigFpMemRegOp", "")
+    header_output += BigFpMemRegDeclare.subst(iop)
+    decoder_output += BigFpMemRegConstructor.subst(iop)
+
+    iop = InstObjParams("bigfpmemlit", "BigFpMemLit", "BigFpMemLitOp", "")
+    header_output += BigFpMemLitDeclare.subst(iop)
+    decoder_output += BigFpMemLitConstructor.subst(iop)
+
     iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", [])
     header_output += VMemMultDeclare.subst(iop)
     decoder_output += VMemMultConstructor.subst(iop)
diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa
index c39f1b14f..aed6bab0d 100644
--- a/src/arch/arm/isa/insts/mem.isa
+++ b/src/arch/arm/isa/insts/mem.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2012 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -48,8 +48,8 @@ let {{
             self.constructTemplate = eval(self.decConstBase + 'Constructor')
 
         def fillTemplates(self, name, Name, codeBlobs, memFlags, instFlags,
-                          base = 'Memory', wbDecl = None, pcDecl = None,
-                          rasPop = False):
+                          base='Memory', wbDecl=None, pcDecl=None,
+                          rasPop=False, size=4, sign=False, faCode=None):
             # Make sure flags are in lists (convert to lists if not).
             memFlags = makeList(memFlags)
             instFlags = makeList(instFlags)
@@ -63,6 +63,22 @@ let {{
 
             codeBlobs["ea_code"] = eaCode
 
+            if faCode:
+                # For AArch64 the fa_code snippet comes already assembled here
+                codeBlobs["fa_code"] = faCode
+            elif wbDecl == None:
+                codeBlobs["fa_code"] = '''
+                    if (dest != INTREG_PC) {
+                        fault->annotate(ArmFault::SAS, %s);
+                        fault->annotate(ArmFault::SSE, %s);
+                        fault->annotate(ArmFault::SRT, dest);
+                    }
+                ''' %("0"    if size == 1 else
+                      "1"    if size == 2 else "2",
+                      "true" if sign      else "false")
+            else:
+                codeBlobs["fa_code"] = ''
+
             macroName = Name
             instFlagsCopy = list(instFlags)
             codeBlobsCopy = dict(codeBlobs)
@@ -108,6 +124,7 @@ let {{
                                       "use_uops" : use_uops,
                                       "use_pc" : use_pc,
                                       "use_wb" : use_wb,
+                                      "fa_code" : '',
                                       "is_ras_pop" : is_ras_pop },
                                     ['IsMacroop'])
                 header_output += self.declareTemplate.subst(iop)
@@ -176,8 +193,13 @@ let {{
         return Name
 
     def buildMemSuffix(sign, size):
-        if size == 4:
-            memSuffix = ''
+        if size == 8:
+            memSuffix = '_ud'
+        elif size == 4:
+            if sign:
+                memSuffix = '_sw'
+            else:
+                memSuffix = '_uw'
         elif size == 2:
             if sign:
                 memSuffix = '_sh'
diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa
index b8425a240..678a125fb 100644
--- a/src/arch/arm/isa/insts/misc.isa
+++ b/src/arch/arm/isa/insts/misc.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010-2012 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -40,21 +40,102 @@
 let {{
 
     svcCode = '''
-    if (FullSystem) {
-        fault = new SupervisorCall;
-    } else {
-        fault = new SupervisorCall(machInst);
-    }
+    fault = new SupervisorCall(machInst, imm);
     '''
 
-    svcIop = InstObjParams("svc", "Svc", "PredOp",
+    svcIop = InstObjParams("svc", "Svc", "ImmOp",
                            { "code": svcCode,
                              "predicate_test": predicateTest },
                            ["IsSyscall", "IsNonSpeculative", "IsSerializeAfter"])
-    header_output = BasicDeclare.subst(svcIop)
-    decoder_output = BasicConstructor.subst(svcIop)
+    header_output = ImmOpDeclare.subst(svcIop)
+    decoder_output = ImmOpConstructor.subst(svcIop)
     exec_output = PredOpExecute.subst(svcIop)
 
+    smcCode = '''
+    HCR  hcr  = Hcr;
+    CPSR cpsr = Cpsr;
+    SCR  scr  = Scr;
+
+    if ((cpsr.mode != MODE_USER) && FullSystem) {
+        if (ArmSystem::haveVirtualization(xc->tcBase()) &&
+            !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) && hcr.tsc) {
+            fault = new HypervisorTrap(machInst, 0, EC_SMC_TO_HYP);
+        } else {
+            if (scr.scd) {
+                fault = disabledFault();
+            } else {
+                fault = new SecureMonitorCall(machInst);
+            }
+        }
+    } else {
+        fault = disabledFault();
+    }
+    '''
+
+    smcIop = InstObjParams("smc", "Smc", "PredOp",
+                           { "code": smcCode,
+                             "predicate_test": predicateTest },
+                           ["IsNonSpeculative", "IsSerializeAfter"])
+    header_output += BasicDeclare.subst(smcIop)
+    decoder_output += BasicConstructor.subst(smcIop)
+    exec_output += PredOpExecute.subst(smcIop)
+
+    hvcCode = '''
+    CPSR cpsr = Cpsr;
+    SCR  scr  = Scr;
+
+    // Filter out the various cases where this instruction isn't defined
+    if (!FullSystem || !ArmSystem::haveVirtualization(xc->tcBase()) ||
+        (cpsr.mode == MODE_USER) ||
+        (ArmSystem::haveSecurity(xc->tcBase()) && (!scr.ns || !scr.hce))) {
+        fault = disabledFault();
+    } else {
+        fault = new HypervisorCall(machInst, imm);
+    }
+    '''
+
+    hvcIop = InstObjParams("hvc", "Hvc", "ImmOp",
+                           { "code": hvcCode,
+                             "predicate_test": predicateTest },
+                           ["IsNonSpeculative", "IsSerializeAfter"])
+    header_output += ImmOpDeclare.subst(hvcIop)
+    decoder_output += ImmOpConstructor.subst(hvcIop)
+    exec_output += PredOpExecute.subst(hvcIop)
+
+    eretCode = '''
+        SCTLR sctlr   = Sctlr;
+        CPSR old_cpsr = Cpsr;
+        old_cpsr.nz   = CondCodesNZ;
+        old_cpsr.c    = CondCodesC;
+        old_cpsr.v    = CondCodesV;
+        old_cpsr.ge   = CondCodesGE;
+
+        CPSR new_cpsr = cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF,
+                            true, sctlr.nmfi, xc->tcBase());
+        Cpsr        = ~CondCodesMask & new_cpsr;
+        CondCodesNZ = new_cpsr.nz;
+        CondCodesC  = new_cpsr.c;
+        CondCodesV  = new_cpsr.v;
+        CondCodesGE = new_cpsr.ge;
+
+        NextThumb = (new_cpsr).t;
+                    NextJazelle = (new_cpsr).j;
+                    NextItState = (((new_cpsr).it2 << 2) & 0xFC)
+                        | ((new_cpsr).it1 & 0x3);
+
+        NPC = (old_cpsr.mode == MODE_HYP) ? ElrHyp : LR;
+    '''
+
+    eretIop = InstObjParams("eret", "Eret", "PredOp",
+                           { "code": eretCode,
+                             "predicate_test": predicateTest },
+                           ["IsNonSpeculative", "IsSerializeAfter"])
+    header_output += BasicDeclare.subst(eretIop)
+    decoder_output += BasicConstructor.subst(eretIop)
+    exec_output += PredOpExecute.subst(eretIop)
+
+
+
 }};
 
 let {{
@@ -87,6 +168,59 @@ let {{
     decoder_output += MrsConstructor.subst(mrsSpsrIop)
     exec_output += PredOpExecute.subst(mrsSpsrIop)
 
+    mrsBankedRegCode = '''
+        bool isIntReg;
+        int  regIdx;
+
+        if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) {
+            if (isIntReg) {
+                Dest = DecodedBankedIntReg;
+            } else {
+                Dest = xc->readMiscReg(regIdx);
+            }
+        } else {
+            return new UndefinedInstruction(machInst, false, mnemonic);
+        }
+    '''
+    mrsBankedRegIop = InstObjParams("mrs", "MrsBankedReg", "MrsOp",
+                                    { "code": mrsBankedRegCode,
+                                      "predicate_test": predicateTest },
+                                    ["IsSerializeBefore"])
+    header_output += MrsBankedRegDeclare.subst(mrsBankedRegIop)
+    decoder_output += MrsBankedRegConstructor.subst(mrsBankedRegIop)
+    exec_output += PredOpExecute.subst(mrsBankedRegIop)
+
+    msrBankedRegCode = '''
+        bool isIntReg;
+        int  regIdx;
+
+        if (decodeMrsMsrBankedReg(byteMask, r, isIntReg, regIdx, Cpsr, Scr, Nsacr)) {
+            if (isIntReg) {
+                // This is a bit nasty, you would have thought that
+                // DecodedBankedIntReg wouldn't be written to unless the
+                // conditions on the IF statements above are met, however if
+                // you look at the generated C code you'll find that they are.
+                // However this is safe as DecodedBankedIntReg (which is used
+                // in operands.isa to get the index of DecodedBankedIntReg)
+                // will return INTREG_DUMMY if its not a valid integer
+                // register, so redirecting the write to somewhere we don't
+                // care about.
+                DecodedBankedIntReg = Op1;
+            } else {
+                xc->setMiscReg(regIdx, Op1);
+            }
+        } else {
+            return new UndefinedInstruction(machInst, false, mnemonic);
+        }
+    '''
+    msrBankedRegIop = InstObjParams("msr", "MsrBankedReg", "MsrRegOp",
+                                    { "code": msrBankedRegCode,
+                                      "predicate_test": predicateTest },
+                                    ["IsSerializeAfter"])
+    header_output += MsrBankedRegDeclare.subst(msrBankedRegIop)
+    decoder_output += MsrBankedRegConstructor.subst(msrBankedRegIop)
+    exec_output += PredOpExecute.subst(msrBankedRegIop)
+
     msrCpsrRegCode = '''
         SCTLR sctlr = Sctlr;
         CPSR old_cpsr = Cpsr;
@@ -96,7 +230,8 @@ let {{
         old_cpsr.ge = CondCodesGE;
 
         CPSR new_cpsr =
-            cpsrWriteByInstr(old_cpsr, Op1, byteMask, false, sctlr.nmfi);
+            cpsrWriteByInstr(old_cpsr, Op1, Scr, Nsacr, byteMask, false,
+                             sctlr.nmfi, xc->tcBase());
         Cpsr = ~CondCodesMask & new_cpsr;
         CondCodesNZ = new_cpsr.nz;
         CondCodesC = new_cpsr.c;
@@ -128,7 +263,8 @@ let {{
         old_cpsr.v = CondCodesV;
         old_cpsr.ge = CondCodesGE;
         CPSR new_cpsr =
-            cpsrWriteByInstr(old_cpsr, imm, byteMask, false, sctlr.nmfi);
+            cpsrWriteByInstr(old_cpsr, imm, Scr, Nsacr, byteMask, false,
+                             sctlr.nmfi, xc->tcBase());
         Cpsr = ~CondCodesMask & new_cpsr;
         CondCodesNZ = new_cpsr.nz;
         CondCodesC = new_cpsr.c;
@@ -488,12 +624,10 @@ let {{
     decoder_output += BasicConstructor.subst(bkptIop)
     exec_output += BasicExecute.subst(bkptIop)
 
-    nopIop = InstObjParams("nop", "NopInst", "PredOp", \
-            { "code" : "", "predicate_test" : predicateTest },
-            ['IsNop'])
+    nopIop = InstObjParams("nop", "NopInst", "ArmStaticInst", "", ['IsNop'])
     header_output += BasicDeclare.subst(nopIop)
-    decoder_output += BasicConstructor.subst(nopIop)
-    exec_output += PredOpExecute.subst(nopIop)
+    decoder_output += BasicConstructor64.subst(nopIop)
+    exec_output += BasicExecute.subst(nopIop)
 
     yieldIop = InstObjParams("yield", "YieldInst", "PredOp", \
             { "code" : "", "predicate_test" : predicateTest })
@@ -502,14 +636,31 @@ let {{
     exec_output += PredOpExecute.subst(yieldIop)
 
     wfeCode = '''
-    // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending
+    HCR  hcr  = Hcr;
+    CPSR cpsr = Cpsr;
+    SCR  scr  = Scr64;
+    SCTLR sctlr = Sctlr;
+
+    // WFE Sleeps if SevMailbox==0 and no unmasked interrupts are pending,
+    ThreadContext *tc = xc->tcBase();
     if (SevMailbox == 1) {
         SevMailbox = 0;
-        PseudoInst::quiesceSkip(xc->tcBase());
-    } else if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkInterrupts(xc->tcBase())) {
-        PseudoInst::quiesceSkip(xc->tcBase());
+        PseudoInst::quiesceSkip(tc);
+    } else if (tc->getCpuPtr()->getInterruptController()->checkInterrupts(tc)) {
+        PseudoInst::quiesceSkip(tc);
+    } else if (cpsr.el == EL0 && !sctlr.ntwe) {
+        PseudoInst::quiesceSkip(tc);
+        fault = new SupervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE);
+    } else if (ArmSystem::haveVirtualization(tc) &&
+               !inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP) &&
+               hcr.twe) {
+        PseudoInst::quiesceSkip(tc);
+        fault = new HypervisorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE);
+    } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twe) {
+        PseudoInst::quiesceSkip(tc);
+        fault = new SecureMonitorTrap(machInst, 0x1E00001, EC_TRAPPED_WFI_WFE);
     } else {
-        PseudoInst::quiesce(xc->tcBase());
+        PseudoInst::quiesce(tc);
     }
     '''
     wfePredFixUpCode = '''
@@ -528,12 +679,30 @@ let {{
     exec_output += QuiescePredOpExecuteWithFixup.subst(wfeIop)
 
     wfiCode = '''
+    HCR  hcr  = Hcr;
+    CPSR cpsr = Cpsr;
+    SCR  scr  = Scr64;
+    SCTLR sctlr = Sctlr;
+
     // WFI doesn't sleep if interrupts are pending (masked or not)
-    if (xc->tcBase()->getCpuPtr()->getInterruptController()->checkRaw()) {
-        PseudoInst::quiesceSkip(xc->tcBase());
+    ThreadContext *tc = xc->tcBase();
+    if (tc->getCpuPtr()->getInterruptController()->checkWfiWake(hcr, cpsr,
+                                                                scr)) {
+        PseudoInst::quiesceSkip(tc);
+    } else if (cpsr.el == EL0 && !sctlr.ntwi) {
+        PseudoInst::quiesceSkip(tc);
+        fault = new SupervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE);
+    } else if (ArmSystem::haveVirtualization(tc) && hcr.twi &&
+               (cpsr.mode != MODE_HYP) && !inSecureState(scr, cpsr)) {
+        PseudoInst::quiesceSkip(tc);
+        fault = new HypervisorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE);
+    } else if (ArmSystem::haveSecurity(tc) && cpsr.el != EL3 && scr.twi) {
+        PseudoInst::quiesceSkip(tc);
+        fault = new SecureMonitorTrap(machInst, 0x1E00000, EC_TRAPPED_WFI_WFE);
     } else {
-        PseudoInst::quiesce(xc->tcBase());
+        PseudoInst::quiesce(tc);
     }
+    tc->getCpuPtr()->clearInterrupt(INT_ABT, 0);
     '''
     wfiIop = InstObjParams("wfi", "WfiInst", "PredOp", \
             { "code" : wfiCode, "predicate_test" : predicateTest },
@@ -564,6 +733,16 @@ let {{
     decoder_output += BasicConstructor.subst(sevIop)
     exec_output += PredOpExecute.subst(sevIop)
 
+    sevlCode = '''
+    SevMailbox = 1;
+    '''
+    sevlIop = InstObjParams("sevl", "SevlInst", "PredOp", \
+            { "code" : sevlCode, "predicate_test" : predicateTest },
+            ["IsNonSpeculative", "IsSquashAfter", "IsUnverifiable"])
+    header_output += BasicDeclare.subst(sevlIop)
+    decoder_output += BasicConstructor.subst(sevlIop)
+    exec_output += BasicExecute.subst(sevlIop)
+
     itIop = InstObjParams("it", "ItInst", "PredOp", \
             { "code" : ";",
               "predicate_test" : predicateTest }, [])
@@ -571,10 +750,7 @@ let {{
     decoder_output += BasicConstructor.subst(itIop)
     exec_output += PredOpExecute.subst(itIop)
     unknownCode = '''
-    if (FullSystem)
-        return new UndefinedInstruction;
-    else
-        return new UndefinedInstruction(machInst, true);
+    return new UndefinedInstruction(machInst, true);
     '''
     unknownIop = InstObjParams("unknown", "Unknown", "UnknownOp", \
                                { "code": unknownCode,
@@ -626,108 +802,152 @@ let {{
     exec_output += PredOpExecute.subst(bfiIop)
 
     mrc14code = '''
-    CPSR cpsr = Cpsr;
-    if (cpsr.mode == MODE_USER) {
-        if (FullSystem)
-            return new UndefinedInstruction;
-        else
-            return new UndefinedInstruction(false, mnemonic);
+    MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(op1);
+    if (!canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) {
+            return new UndefinedInstruction(machInst, false, mnemonic);
+    }
+    if (mcrMrc14TrapToHyp((const MiscRegIndex) op1, Hcr, Cpsr, Scr, Hdcr,
+                          Hstr, Hcptr, imm)) {
+        return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC);
     }
     Dest = MiscOp1;
     '''
 
-    mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegOp",
+    mrc14Iop = InstObjParams("mrc", "Mrc14", "RegRegImmOp",
                              { "code": mrc14code,
                                "predicate_test": predicateTest }, [])
-    header_output += RegRegOpDeclare.subst(mrc14Iop)
-    decoder_output += RegRegOpConstructor.subst(mrc14Iop)
+    header_output += RegRegImmOpDeclare.subst(mrc14Iop)
+    decoder_output += RegRegImmOpConstructor.subst(mrc14Iop)
     exec_output += PredOpExecute.subst(mrc14Iop)
 
 
     mcr14code = '''
-    CPSR cpsr = Cpsr;
-    if (cpsr.mode == MODE_USER) {
-        if (FullSystem)
-            return new UndefinedInstruction;
-        else
-            return new UndefinedInstruction(false, mnemonic);
+    MiscRegIndex miscReg = (MiscRegIndex) xc->tcBase()->flattenMiscIndex(dest);
+    if (!canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase())) {
+        return new UndefinedInstruction(machInst, false, mnemonic);
+    }
+    if (mcrMrc14TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr,
+                          Hstr, Hcptr, imm)) {
+        return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP14_MCR_MRC);
     }
     MiscDest = Op1;
     '''
-    mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegOp",
+    mcr14Iop = InstObjParams("mcr", "Mcr14", "RegRegImmOp",
                              { "code": mcr14code,
                                "predicate_test": predicateTest },
                                ["IsSerializeAfter","IsNonSpeculative"])
-    header_output += RegRegOpDeclare.subst(mcr14Iop)
-    decoder_output += RegRegOpConstructor.subst(mcr14Iop)
+    header_output += RegRegImmOpDeclare.subst(mcr14Iop)
+    decoder_output += RegRegImmOpConstructor.subst(mcr14Iop)
     exec_output += PredOpExecute.subst(mcr14Iop)
 
-    mrc14UserIop = InstObjParams("mrc", "Mrc14User", "RegRegOp",
-                                 { "code": "Dest = MiscOp1;",
-                                   "predicate_test": predicateTest }, [])
-    header_output += RegRegOpDeclare.subst(mrc14UserIop)
-    decoder_output += RegRegOpConstructor.subst(mrc14UserIop)
-    exec_output += PredOpExecute.subst(mrc14UserIop)
-
-    mcr14UserIop = InstObjParams("mcr", "Mcr14User", "RegRegOp",
-                                 { "code": "MiscDest = Op1",
-                                   "predicate_test": predicateTest },
-                                   ["IsSerializeAfter","IsNonSpeculative"])
-    header_output += RegRegOpDeclare.subst(mcr14UserIop)
-    decoder_output += RegRegOpConstructor.subst(mcr14UserIop)
-    exec_output += PredOpExecute.subst(mcr14UserIop)
-
     mrc15code = '''
-    CPSR cpsr = Cpsr;
-    if (cpsr.mode == MODE_USER) {
-        if (FullSystem)
-            return new UndefinedInstruction;
-        else
-            return new UndefinedInstruction(false, mnemonic);
+    int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase());
+    MiscRegIndex miscReg = (MiscRegIndex)
+                           xc->tcBase()->flattenMiscIndex(preFlatOp1);
+    bool hypTrap = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr,
+                                     Hcptr, imm);
+    bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase());
+
+    // if we're in non secure PL1 mode then we can trap regargless of whether
+    // the register is accessable, in other modes we trap if only if the register
+    // IS accessable.
+    if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) {
+        return new UndefinedInstruction(machInst, false, mnemonic);
     }
-    Dest = MiscOp1;
+    if (hypTrap) {
+        return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC);
+    }
+    Dest = MiscNsBankedOp1;
     '''
 
-    mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegOp",
+    mrc15Iop = InstObjParams("mrc", "Mrc15", "RegRegImmOp",
                              { "code": mrc15code,
                                "predicate_test": predicateTest }, [])
-    header_output += RegRegOpDeclare.subst(mrc15Iop)
-    decoder_output += RegRegOpConstructor.subst(mrc15Iop)
+    header_output += RegRegImmOpDeclare.subst(mrc15Iop)
+    decoder_output += RegRegImmOpConstructor.subst(mrc15Iop)
     exec_output += PredOpExecute.subst(mrc15Iop)
 
 
     mcr15code = '''
-    CPSR cpsr = Cpsr;
-    if (cpsr.mode == MODE_USER) {
-        if (FullSystem)
-            return new UndefinedInstruction;
-        else
-            return new UndefinedInstruction(false, mnemonic);
+    int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase());
+    MiscRegIndex miscReg = (MiscRegIndex)
+                       xc->tcBase()->flattenMiscIndex(preFlatDest);
+    bool hypTrap  = mcrMrc15TrapToHyp(miscReg, Hcr, Cpsr, Scr, Hdcr, Hstr,
+                                      Hcptr, imm);
+    bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase());
+
+    // if we're in non secure PL1 mode then we can trap regargless of whether
+    // the register is accessable, in other modes we trap if only if the register
+    // IS accessable.
+    if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) {
+        return new UndefinedInstruction(machInst, false, mnemonic);
     }
-    MiscDest = Op1;
+    if (hypTrap) {
+        return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCR_MRC);
+    }
+    MiscNsBankedDest = Op1;
     '''
-    mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegOp",
+    mcr15Iop = InstObjParams("mcr", "Mcr15", "RegRegImmOp",
                              { "code": mcr15code,
                                "predicate_test": predicateTest },
                                ["IsSerializeAfter","IsNonSpeculative"])
-    header_output += RegRegOpDeclare.subst(mcr15Iop)
-    decoder_output += RegRegOpConstructor.subst(mcr15Iop)
+    header_output += RegRegImmOpDeclare.subst(mcr15Iop)
+    decoder_output += RegRegImmOpConstructor.subst(mcr15Iop)
     exec_output += PredOpExecute.subst(mcr15Iop)
 
-    mrc15UserIop = InstObjParams("mrc", "Mrc15User", "RegRegOp",
-                                 { "code": "Dest = MiscOp1;",
-                                   "predicate_test": predicateTest }, [])
-    header_output += RegRegOpDeclare.subst(mrc15UserIop)
-    decoder_output += RegRegOpConstructor.subst(mrc15UserIop)
-    exec_output += PredOpExecute.subst(mrc15UserIop)
 
-    mcr15UserIop = InstObjParams("mcr", "Mcr15User", "RegRegOp",
-                                 { "code": "MiscDest = Op1",
-                                   "predicate_test": predicateTest },
-                                   ["IsSerializeAfter","IsNonSpeculative"])
-    header_output += RegRegOpDeclare.subst(mcr15UserIop)
-    decoder_output += RegRegOpConstructor.subst(mcr15UserIop)
-    exec_output += PredOpExecute.subst(mcr15UserIop)
+    mrrc15code = '''
+    int preFlatOp1 = flattenMiscRegNsBanked(op1, xc->tcBase());
+    MiscRegIndex miscReg = (MiscRegIndex)
+                           xc->tcBase()->flattenMiscIndex(preFlatOp1);
+    bool hypTrap = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm);
+    bool canRead = canReadCoprocReg(miscReg, Scr, Cpsr, xc->tcBase());
+
+    // if we're in non secure PL1 mode then we can trap regargless of whether
+    // the register is accessable, in other modes we trap if only if the register
+    // IS accessable.
+    if (!canRead & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) {
+        return new UndefinedInstruction(machInst, false, mnemonic);
+    }
+    if (hypTrap) {
+        return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC);
+    }
+    Dest = bits(MiscNsBankedOp164, 63, 32);
+    Dest2 = bits(MiscNsBankedOp164, 31, 0);
+    '''
+    mrrc15Iop = InstObjParams("mrrc", "Mrrc15", "MrrcOp",
+                              { "code": mrrc15code,
+                                "predicate_test": predicateTest }, [])
+    header_output += MrrcOpDeclare.subst(mrrc15Iop)
+    decoder_output += MrrcOpConstructor.subst(mrrc15Iop)
+    exec_output += PredOpExecute.subst(mrrc15Iop)
+
+
+    mcrr15code = '''
+    int preFlatDest = flattenMiscRegNsBanked(dest, xc->tcBase());
+    MiscRegIndex miscReg = (MiscRegIndex)
+                           xc->tcBase()->flattenMiscIndex(preFlatDest);
+    bool hypTrap  = mcrrMrrc15TrapToHyp(miscReg, Cpsr, Scr, Hstr, Hcr, imm);
+    bool canWrite = canWriteCoprocReg(miscReg, Scr, Cpsr, xc->tcBase());
+
+    // if we're in non secure PL1 mode then we can trap regargless of whether
+    // the register is accessable, in other modes we trap if only if the register
+    // IS accessable.
+    if (!canWrite & !(hypTrap & !inUserMode(Cpsr) & !inSecureState(Scr, Cpsr))) {
+        return new UndefinedInstruction(machInst, false, mnemonic);
+    }
+    if (hypTrap) {
+        return new HypervisorTrap(machInst, imm, EC_TRAPPED_CP15_MCRR_MRRC);
+    }
+    MiscNsBankedDest64 = ((uint64_t) Op1 << 32) | Op2;
+    '''
+    mcrr15Iop = InstObjParams("mcrr", "Mcrr15", "McrrOp",
+                              { "code": mcrr15code,
+                                "predicate_test": predicateTest }, [])
+    header_output += McrrOpDeclare.subst(mcrr15Iop)
+    decoder_output += McrrOpConstructor.subst(mcrr15Iop)
+    exec_output += PredOpExecute.subst(mcrr15Iop)
+
 
     enterxCode = '''
         NextThumb = true;
@@ -775,35 +995,53 @@ let {{
     exec_output += PredOpExecute.subst(clrexIop)
 
     isbCode = '''
+        // If the barrier is due to a CP15 access check for hyp traps
+        if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15ISB, Hcr, Cpsr, Scr,
+            Hdcr, Hstr, Hcptr, imm)) {
+            return new HypervisorTrap(machInst, imm,
+                EC_TRAPPED_CP15_MCR_MRC);
+        }
         fault = new FlushPipe;
     '''
-    isbIop = InstObjParams("isb", "Isb", "PredOp",
+    isbIop = InstObjParams("isb", "Isb", "ImmOp",
                              {"code": isbCode,
                                "predicate_test": predicateTest},
                                 ['IsSerializeAfter'])
-    header_output += BasicDeclare.subst(isbIop)
-    decoder_output += BasicConstructor.subst(isbIop)
+    header_output += ImmOpDeclare.subst(isbIop)
+    decoder_output += ImmOpConstructor.subst(isbIop)
     exec_output += PredOpExecute.subst(isbIop)
 
     dsbCode = '''
+        // If the barrier is due to a CP15 access check for hyp traps
+        if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DSB, Hcr, Cpsr, Scr,
+            Hdcr, Hstr, Hcptr, imm)) {
+            return new HypervisorTrap(machInst, imm,
+                EC_TRAPPED_CP15_MCR_MRC);
+        }
         fault = new FlushPipe;
     '''
-    dsbIop = InstObjParams("dsb", "Dsb", "PredOp",
+    dsbIop = InstObjParams("dsb", "Dsb", "ImmOp",
                              {"code": dsbCode,
                                "predicate_test": predicateTest},
                               ['IsMemBarrier', 'IsSerializeAfter'])
-    header_output += BasicDeclare.subst(dsbIop)
-    decoder_output += BasicConstructor.subst(dsbIop)
+    header_output += ImmOpDeclare.subst(dsbIop)
+    decoder_output += ImmOpConstructor.subst(dsbIop)
     exec_output += PredOpExecute.subst(dsbIop)
 
     dmbCode = '''
+        // If the barrier is due to a CP15 access check for hyp traps
+        if ((imm != 0) && mcrMrc15TrapToHyp(MISCREG_CP15DMB, Hcr, Cpsr, Scr,
+            Hdcr, Hstr, Hcptr, imm)) {
+            return new HypervisorTrap(machInst, imm,
+                EC_TRAPPED_CP15_MCR_MRC);
+        }
     '''
-    dmbIop = InstObjParams("dmb", "Dmb", "PredOp",
+    dmbIop = InstObjParams("dmb", "Dmb", "ImmOp",
                              {"code": dmbCode,
                                "predicate_test": predicateTest},
                                ['IsMemBarrier'])
-    header_output += BasicDeclare.subst(dmbIop)
-    decoder_output += BasicConstructor.subst(dmbIop)
+    header_output += ImmOpDeclare.subst(dmbIop)
+    decoder_output += ImmOpConstructor.subst(dmbIop)
     exec_output += PredOpExecute.subst(dmbIop)
 
     dbgCode = '''
diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa
new file mode 100644
index 000000000..6ebbcc2ba
--- /dev/null
+++ b/src/arch/arm/isa/insts/misc64.isa
@@ -0,0 +1,147 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+    svcCode = '''
+    fault = new SupervisorCall(machInst, bits(machInst, 20, 5));
+    '''
+
+    svcIop = InstObjParams("svc", "Svc64", "ArmStaticInst",
+                           svcCode, ["IsSyscall", "IsNonSpeculative",
+                                     "IsSerializeAfter"])
+    header_output = BasicDeclare.subst(svcIop)
+    decoder_output = BasicConstructor64.subst(svcIop)
+    exec_output = BasicExecute.subst(svcIop)
+
+    # @todo: extend to take into account Virtualization.
+    smcCode = '''
+    SCR scr = Scr64;
+    CPSR cpsr = Cpsr;
+
+    if (!ArmSystem::haveSecurity(xc->tcBase()) || inUserMode(cpsr) || scr.smd) {
+        fault = disabledFault();
+    } else {
+        fault = new SecureMonitorCall(machInst);
+    }
+    '''
+
+    smcIop = InstObjParams("smc", "Smc64", "ArmStaticInst",
+                           smcCode, ["IsNonSpeculative", "IsSerializeAfter"])
+    header_output += BasicDeclare.subst(smcIop)
+    decoder_output += BasicConstructor64.subst(smcIop)
+    exec_output += BasicExecute.subst(smcIop)
+
+    def subst(templateBase, iop):
+        global header_output, decoder_output, exec_output
+        header_output += eval(templateBase + "Declare").subst(iop)
+        decoder_output += eval(templateBase + "Constructor").subst(iop)
+        exec_output += BasicExecute.subst(iop)
+
+    bfmMaskCode = '''
+    uint64_t bitMask;
+    int diff = imm2 - imm1;
+    if (imm1 <= imm2) {
+        bitMask = mask(diff + 1);
+    } else {
+        bitMask = mask(imm2 + 1);
+        bitMask = (bitMask >> imm1) | (bitMask << (intWidth - imm1));
+        diff += intWidth;
+    }
+    uint64_t topBits M5_VAR_USED = ~mask(diff+1);
+    uint64_t result = (Op164 >> imm1) | (Op164 << (intWidth - imm1));
+    result &= bitMask;
+    '''
+
+    bfmCode = bfmMaskCode + 'Dest64 = result | (Dest64 & ~bitMask);'
+    bfmIop = InstObjParams("bfm", "Bfm64", "RegRegImmImmOp64", bfmCode);
+    subst("RegRegImmImmOp64", bfmIop)
+
+    ubfmCode = bfmMaskCode + 'Dest64 = result;'
+    ubfmIop = InstObjParams("ubfm", "Ubfm64", "RegRegImmImmOp64", ubfmCode);
+    subst("RegRegImmImmOp64", ubfmIop)
+
+    sbfmCode = bfmMaskCode + \
+        'Dest64 = result | (bits(Op164, imm2) ? topBits : 0);'
+    sbfmIop = InstObjParams("sbfm", "Sbfm64", "RegRegImmImmOp64", sbfmCode);
+    subst("RegRegImmImmOp64", sbfmIop)
+
+    extrCode = '''
+        if (imm == 0) {
+            Dest64 = Op264;
+        } else {
+            Dest64 = (Op164 << (intWidth - imm)) | (Op264 >> imm);
+        }
+    '''
+    extrIop = InstObjParams("extr", "Extr64", "RegRegRegImmOp64", extrCode);
+    subst("RegRegRegImmOp64", extrIop);
+
+    unknownCode = '''
+            return new UndefinedInstruction(machInst, true);
+    '''
+    unknown64Iop = InstObjParams("unknown", "Unknown64", "UnknownOp64",
+                                 unknownCode)
+    header_output += BasicDeclare.subst(unknown64Iop)
+    decoder_output += BasicConstructor64.subst(unknown64Iop)
+    exec_output += BasicExecute.subst(unknown64Iop)
+
+    isbIop = InstObjParams("isb", "Isb64", "ArmStaticInst",
+                           "fault = new FlushPipe;", ['IsSerializeAfter'])
+    header_output += BasicDeclare.subst(isbIop)
+    decoder_output += BasicConstructor64.subst(isbIop)
+    exec_output += BasicExecute.subst(isbIop)
+
+    dsbIop = InstObjParams("dsb", "Dsb64", "ArmStaticInst",
+                           "fault = new FlushPipe;",
+                           ['IsMemBarrier', 'IsSerializeAfter'])
+    header_output += BasicDeclare.subst(dsbIop)
+    decoder_output += BasicConstructor64.subst(dsbIop)
+    exec_output += BasicExecute.subst(dsbIop)
+
+    dmbIop = InstObjParams("dmb", "Dmb64", "ArmStaticInst", "",
+                           ['IsMemBarrier'])
+    header_output += BasicDeclare.subst(dmbIop)
+    decoder_output += BasicConstructor64.subst(dmbIop)
+    exec_output += BasicExecute.subst(dmbIop)
+
+    clrexIop = InstObjParams("clrex", "Clrex64", "ArmStaticInst",
+                             "LLSCLock = 0;")
+    header_output += BasicDeclare.subst(clrexIop)
+    decoder_output += BasicConstructor64.subst(clrexIop)
+    exec_output += BasicExecute.subst(clrexIop)
+}};
diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa
index 876bb3bb7..ca5c3038c 100644
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -94,8 +94,8 @@ output header {{
     template <template <typename T> class Base>
     StaticInstPtr
     decodeNeonUThreeUSReg(unsigned size,
-                         ExtMachInst machInst, IntRegIndex dest,
-                         IntRegIndex op1, IntRegIndex op2)
+                          ExtMachInst machInst, IntRegIndex dest,
+                          IntRegIndex op1, IntRegIndex op2)
     {
         switch (size) {
           case 0:
@@ -112,8 +112,8 @@ output header {{
     template <template <typename T> class Base>
     StaticInstPtr
     decodeNeonSThreeUSReg(unsigned size,
-                         ExtMachInst machInst, IntRegIndex dest,
-                         IntRegIndex op1, IntRegIndex op2)
+                          ExtMachInst machInst, IntRegIndex dest,
+                          IntRegIndex op1, IntRegIndex op2)
     {
         switch (size) {
           case 0:
@@ -127,6 +127,38 @@ output header {{
         }
     }
 
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeNeonSThreeHAndWReg(unsigned size, ExtMachInst machInst,
+                             IntRegIndex dest, IntRegIndex op1,
+                             IntRegIndex op2)
+    {
+        switch (size) {
+          case 1:
+            return new Base<int16_t>(machInst, dest, op1, op2);
+          case 2:
+            return new Base<int32_t>(machInst, dest, op1, op2);
+          default:
+            return new Unknown(machInst);
+        }
+    }
+
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeNeonSThreeImmHAndWReg(unsigned size, ExtMachInst machInst,
+                                IntRegIndex dest, IntRegIndex op1,
+                                IntRegIndex op2, uint64_t imm)
+    {
+        switch (size) {
+          case 1:
+            return new Base<int16_t>(machInst, dest, op1, op2, imm);
+          case 2:
+            return new Base<int32_t>(machInst, dest, op1, op2, imm);
+          default:
+            return new Unknown(machInst);
+        }
+    }
+
     template <template <typename T> class Base>
     StaticInstPtr
     decodeNeonUSThreeUSReg(bool notSigned, unsigned size,
@@ -174,6 +206,38 @@ output header {{
         }
     }
 
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonSThreeXReg(bool q, unsigned size,
+                         ExtMachInst machInst, IntRegIndex dest,
+                         IntRegIndex op1, IntRegIndex op2)
+    {
+        if (q) {
+            return decodeNeonSThreeUReg<BaseQ>(
+                    size, machInst, dest, op1, op2);
+        } else {
+            return decodeNeonSThreeUSReg<BaseD>(
+                    size, machInst, dest, op1, op2);
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUThreeXReg(bool q, unsigned size,
+                         ExtMachInst machInst, IntRegIndex dest,
+                         IntRegIndex op1, IntRegIndex op2)
+    {
+        if (q) {
+            return decodeNeonUThreeUReg<BaseQ>(
+                    size, machInst, dest, op1, op2);
+        } else {
+            return decodeNeonUThreeUSReg<BaseD>(
+                    size, machInst, dest, op1, op2);
+        }
+    }
+
     template <template <typename T> class BaseD,
               template <typename T> class BaseQ>
     StaticInstPtr
@@ -238,6 +302,124 @@ output header {{
         }
     }
 
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUThreeFpReg(bool q, unsigned size, ExtMachInst machInst,
+                          IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
+    {
+        if (q) {
+            if (size)
+                return new BaseQ<uint64_t>(machInst, dest, op1, op2);
+            else
+                return new BaseQ<uint32_t>(machInst, dest, op1, op2);
+        } else {
+            if (size)
+                return new Unknown(machInst);
+            else
+                return new BaseD<uint32_t>(machInst, dest, op1, op2);
+        }
+    }
+
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeNeonUThreeScFpReg(bool size, ExtMachInst machInst,
+                            IntRegIndex dest, IntRegIndex op1, IntRegIndex op2)
+    {
+        if (size)
+            return new Base<uint64_t>(machInst, dest, op1, op2);
+        else
+            return new Base<uint32_t>(machInst, dest, op1, op2);
+    }
+
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeNeonUThreeImmScFpReg(bool size, ExtMachInst machInst,
+                               IntRegIndex dest, IntRegIndex op1,
+                               IntRegIndex op2, uint64_t imm)
+    {
+        if (size)
+            return new Base<uint64_t>(machInst, dest, op1, op2, imm);
+        else
+            return new Base<uint32_t>(machInst, dest, op1, op2, imm);
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
+                                IntRegIndex dest, IntRegIndex op1,
+                                IntRegIndex op2, uint64_t imm)
+    {
+        if (q) {
+            switch (size) {
+              case 1:
+                return new BaseQ<uint16_t>(machInst, dest, op1, op2, imm);
+              case 2:
+                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
+              default:
+                return new Unknown(machInst);
+            }
+        } else {
+            switch (size) {
+              case 1:
+                return new BaseD<uint16_t>(machInst, dest, op1, op2, imm);
+              case 2:
+                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
+              default:
+                return new Unknown(machInst);
+            }
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonSThreeImmHAndWReg(bool q, unsigned size, ExtMachInst machInst,
+                                IntRegIndex dest, IntRegIndex op1,
+                                IntRegIndex op2, uint64_t imm)
+    {
+        if (q) {
+            switch (size) {
+              case 1:
+                return new BaseQ<int16_t>(machInst, dest, op1, op2, imm);
+              case 2:
+                return new BaseQ<int32_t>(machInst, dest, op1, op2, imm);
+              default:
+                return new Unknown(machInst);
+            }
+        } else {
+            switch (size) {
+              case 1:
+                return new BaseD<int16_t>(machInst, dest, op1, op2, imm);
+              case 2:
+                return new BaseD<int32_t>(machInst, dest, op1, op2, imm);
+              default:
+                return new Unknown(machInst);
+            }
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUThreeImmFpReg(bool q, unsigned size, ExtMachInst machInst,
+                             IntRegIndex dest, IntRegIndex op1,
+                             IntRegIndex op2, uint64_t imm)
+    {
+        if (q) {
+            if (size)
+                return new BaseQ<uint64_t>(machInst, dest, op1, op2, imm);
+            else
+                return new BaseQ<uint32_t>(machInst, dest, op1, op2, imm);
+        } else {
+            if (size)
+                return new Unknown(machInst);
+            else
+                return new BaseD<uint32_t>(machInst, dest, op1, op2, imm);
+        }
+    }
+
     template <template <typename T> class BaseD,
               template <typename T> class BaseQ>
     StaticInstPtr
@@ -345,6 +527,46 @@ output header {{
         }
     }
 
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeNeonUTwoShiftUReg(unsigned size,
+                            ExtMachInst machInst, IntRegIndex dest,
+                            IntRegIndex op1, uint64_t imm)
+    {
+        switch (size) {
+          case 0:
+            return new Base<uint8_t>(machInst, dest, op1, imm);
+          case 1:
+            return new Base<uint16_t>(machInst, dest, op1, imm);
+          case 2:
+            return new Base<uint32_t>(machInst, dest, op1, imm);
+          case 3:
+            return new Base<uint64_t>(machInst, dest, op1, imm);
+          default:
+            return new Unknown(machInst);
+        }
+    }
+
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeNeonSTwoShiftUReg(unsigned size,
+                            ExtMachInst machInst, IntRegIndex dest,
+                            IntRegIndex op1, uint64_t imm)
+    {
+        switch (size) {
+          case 0:
+            return new Base<int8_t>(machInst, dest, op1, imm);
+          case 1:
+            return new Base<int16_t>(machInst, dest, op1, imm);
+          case 2:
+            return new Base<int32_t>(machInst, dest, op1, imm);
+          case 3:
+            return new Base<int64_t>(machInst, dest, op1, imm);
+          default:
+            return new Unknown(machInst);
+        }
+    }
+
     template <template <typename T> class BaseD,
               template <typename T> class BaseQ>
     StaticInstPtr
@@ -411,6 +633,66 @@ output header {{
         }
     }
 
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
+                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
+    {
+        if (q) {
+            return decodeNeonUTwoShiftUReg<BaseQ>(
+                size, machInst, dest, op1, imm);
+        } else {
+            return decodeNeonUTwoShiftUSReg<BaseD>(
+                size, machInst, dest, op1, imm);
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonSTwoShiftXReg(bool q, unsigned size, ExtMachInst machInst,
+                            IntRegIndex dest, IntRegIndex op1, uint64_t imm)
+    {
+        if (q) {
+            return decodeNeonSTwoShiftUReg<BaseQ>(
+                size, machInst, dest, op1, imm);
+        } else {
+            return decodeNeonSTwoShiftUSReg<BaseD>(
+                size, machInst, dest, op1, imm);
+        }
+    }
+
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeNeonUTwoShiftUFpReg(unsigned size, ExtMachInst machInst,
+                              IntRegIndex dest, IntRegIndex op1, uint64_t imm)
+    {
+        if (size)
+            return new Base<uint64_t>(machInst, dest, op1, imm);
+        else
+            return new Base<uint32_t>(machInst, dest, op1, imm);
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUTwoShiftFpReg(bool q, unsigned size, ExtMachInst machInst,
+                             IntRegIndex dest, IntRegIndex op1, uint64_t imm)
+    {
+        if (q) {
+            if (size)
+                return new BaseQ<uint64_t>(machInst, dest, op1, imm);
+            else
+                return new BaseQ<uint32_t>(machInst, dest, op1, imm);
+        } else {
+            if (size)
+                return new Unknown(machInst);
+            else
+                return new BaseD<uint32_t>(machInst, dest, op1, imm);
+        }
+    }
+
     template <template <typename T> class Base>
     StaticInstPtr
     decodeNeonUTwoMiscUSReg(unsigned size,
@@ -451,8 +733,8 @@ output header {{
               template <typename T> class BaseQ>
     StaticInstPtr
     decodeNeonUTwoMiscSReg(bool q, unsigned size,
-                          ExtMachInst machInst, IntRegIndex dest,
-                          IntRegIndex op1)
+                           ExtMachInst machInst, IntRegIndex dest,
+                           IntRegIndex op1)
     {
         if (q) {
             return decodeNeonUTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
@@ -465,8 +747,8 @@ output header {{
               template <typename T> class BaseQ>
     StaticInstPtr
     decodeNeonSTwoMiscSReg(bool q, unsigned size,
-                          ExtMachInst machInst, IntRegIndex dest,
-                          IntRegIndex op1)
+                           ExtMachInst machInst, IntRegIndex dest,
+                           IntRegIndex op1)
     {
         if (q) {
             return decodeNeonSTwoMiscUSReg<BaseQ>(size, machInst, dest, op1);
@@ -498,8 +780,8 @@ output header {{
     template <template <typename T> class Base>
     StaticInstPtr
     decodeNeonSTwoMiscUReg(unsigned size,
-                            ExtMachInst machInst, IntRegIndex dest,
-                            IntRegIndex op1)
+                           ExtMachInst machInst, IntRegIndex dest,
+                           IntRegIndex op1)
     {
         switch (size) {
           case 0:
@@ -559,6 +841,221 @@ output header {{
         }
     }
 
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
+                           IntRegIndex dest, IntRegIndex op1)
+    {
+        if (q) {
+            return decodeNeonUTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
+        } else {
+            return decodeNeonUTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonSTwoMiscXReg(bool q, unsigned size, ExtMachInst machInst,
+                           IntRegIndex dest, IntRegIndex op1)
+    {
+        if (q) {
+            return decodeNeonSTwoMiscUReg<BaseQ>(size, machInst, dest, op1);
+        } else {
+            return decodeNeonSTwoMiscUSReg<BaseD>(size, machInst, dest, op1);
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUTwoMiscFpReg(bool q, unsigned size, ExtMachInst machInst,
+                            IntRegIndex dest, IntRegIndex op1)
+    {
+        if (q) {
+            if (size)
+                return new BaseQ<uint64_t>(machInst, dest, op1);
+            else
+                return new BaseQ<uint32_t>(machInst, dest, op1);
+        } else {
+            if (size)
+                return new Unknown(machInst);
+            else
+                return new BaseD<uint32_t>(machInst, dest, op1);
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUTwoMiscPwiseScFpReg(unsigned size, ExtMachInst machInst,
+                                   IntRegIndex dest, IntRegIndex op1)
+    {
+        if (size)
+            return new BaseQ<uint64_t>(machInst, dest, op1);
+        else
+            return new BaseD<uint32_t>(machInst, dest, op1);
+    }
+
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeNeonUTwoMiscScFpReg(unsigned size, ExtMachInst machInst,
+                              IntRegIndex dest, IntRegIndex op1)
+    {
+        if (size)
+            return new Base<uint64_t>(machInst, dest, op1);
+        else
+            return new Base<uint32_t>(machInst, dest, op1);
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
+                              IntRegIndex dest, IntRegIndex op1)
+    {
+        if (q) {
+            switch (size) {
+              case 0x0:
+                return new BaseQ<uint8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseQ<uint16_t>(machInst, dest, op1);
+              case 0x2:
+                return new BaseQ<uint32_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        } else {
+            switch (size) {
+              case 0x0:
+                return new BaseD<uint8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseD<uint16_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ,
+              template <typename T> class BaseBQ>
+    StaticInstPtr
+    decodeNeonUAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
+                              IntRegIndex dest, IntRegIndex op1)
+    {
+        if (q) {
+            switch (size) {
+              case 0x0:
+                return new BaseQ<uint8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseQ<uint16_t>(machInst, dest, op1);
+              case 0x2:
+                return new BaseBQ<uint32_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        } else {
+            switch (size) {
+              case 0x0:
+                return new BaseD<uint8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseD<uint16_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ>
+    StaticInstPtr
+    decodeNeonSAcrossLanesReg(bool q, unsigned size, ExtMachInst machInst,
+                              IntRegIndex dest, IntRegIndex op1)
+    {
+        if (q) {
+            switch (size) {
+              case 0x0:
+                return new BaseQ<int8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseQ<int16_t>(machInst, dest, op1);
+              case 0x2:
+                return new BaseQ<int32_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        } else {
+            switch (size) {
+              case 0x0:
+                return new BaseD<int8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseD<int16_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ,
+              template <typename T> class BaseBQ>
+    StaticInstPtr
+    decodeNeonUAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
+                                  IntRegIndex dest, IntRegIndex op1)
+    {
+        if (q) {
+            switch (size) {
+              case 0x0:
+                return new BaseQ<uint8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseQ<uint16_t>(machInst, dest, op1);
+              case 0x2:
+                return new BaseBQ<uint32_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        } else {
+            switch (size) {
+              case 0x0:
+                return new BaseD<uint8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseD<uint16_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        }
+    }
+
+    template <template <typename T> class BaseD,
+              template <typename T> class BaseQ,
+              template <typename T> class BaseBQ>
+    StaticInstPtr
+    decodeNeonSAcrossLanesLongReg(bool q, unsigned size, ExtMachInst machInst,
+                                  IntRegIndex dest, IntRegIndex op1)
+    {
+        if (q) {
+            switch (size) {
+              case 0x0:
+                return new BaseQ<int8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseQ<int16_t>(machInst, dest, op1);
+              case 0x2:
+                return new BaseBQ<int32_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        } else {
+            switch (size) {
+              case 0x0:
+                return new BaseD<int8_t>(machInst, dest, op1);
+              case 0x1:
+                return new BaseD<int16_t>(machInst, dest, op1);
+              default:
+                return new Unknown(machInst);
+            }
+        }
+    }
 }};
 
 output exec {{
@@ -872,10 +1369,7 @@ let {{
             readDestCode = 'destElem = gtoh(destReg.elements[i]);'
         eWalkCode += '''
         if (imm < 0 && imm >= eCount) {
-            if (FullSystem)
-                fault = new UndefinedInstruction;
-            else
-                fault = new UndefinedInstruction(false, mnemonic);
+            fault = new UndefinedInstruction(machInst, false, mnemonic);
         } else {
             for (unsigned i = 0; i < eCount; i++) {
                 Element srcElem1 = gtoh(srcReg1.elements[i]);
@@ -926,10 +1420,7 @@ let {{
             readDestCode = 'destElem = gtoh(destReg.elements[i]);'
         eWalkCode += '''
         if (imm < 0 && imm >= eCount) {
-            if (FullSystem)
-                fault = new UndefinedInstruction;
-            else
-                fault = new UndefinedInstruction(false, mnemonic);
+            fault = new UndefinedInstruction(machInst, false, mnemonic);
         } else {
             for (unsigned i = 0; i < eCount; i++) {
                 Element srcElem1 = gtoh(srcReg1.elements[i]);
@@ -978,10 +1469,7 @@ let {{
             readDestCode = 'destReg = destRegs[i];'
         eWalkCode += '''
         if (imm < 0 && imm >= eCount) {
-            if (FullSystem)
-                fault = new UndefinedInstruction;
-            else
-                fault = new UndefinedInstruction(false, mnemonic);
+            fault = new UndefinedInstruction(machInst, false, mnemonic);
         } else {
             for (unsigned i = 0; i < rCount; i++) {
                 FloatReg srcReg1 = srcRegs1[i];
@@ -2156,7 +2644,7 @@ let {{
         bool done;
         destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
         if (!done) {
-            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMaxS,
+            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
                                true, true, VfpRoundNearest);
         } else if (flushToZero(srcReg1, srcReg2)) {
             fpscr.idc = 1;
@@ -2171,7 +2659,7 @@ let {{
         bool done;
         destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
         if (!done) {
-            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMinS,
+            destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
                                true, true, VfpRoundNearest);
         } else if (flushToZero(srcReg1, srcReg2)) {
             fpscr.idc = 1;
@@ -2234,6 +2722,24 @@ let {{
     threeEqualRegInstFp("vmla", "NVmlaDFp", "SimdFloatMultAccOp", ("float",), 2, vmlafpCode, True)
     threeEqualRegInstFp("vmla", "NVmlaQFp", "SimdFloatMultAccOp", ("float",), 4, vmlafpCode, True)
 
+    vfmafpCode = '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        destReg = ternaryOp(fpscr, srcReg1, srcReg2, destReg, fpMulAdd<float>,
+                            true, true, VfpRoundNearest);
+        FpscrExc = fpscr;
+    '''
+    threeEqualRegInstFp("vfma", "NVfmaDFp", "SimdFloatMultAccOp", ("float",), 2, vfmafpCode, True)
+    threeEqualRegInstFp("vfma", "NVfmaQFp", "SimdFloatMultAccOp", ("float",), 4, vfmafpCode, True)
+
+    vfmsfpCode = '''
+        FPSCR fpscr = (FPSCR) FpscrExc;
+        destReg = ternaryOp(fpscr, -srcReg1, srcReg2, destReg, fpMulAdd<float>,
+                            true, true, VfpRoundNearest);
+        FpscrExc = fpscr;
+    '''
+    threeEqualRegInstFp("vfms", "NVfmsDFp", "SimdFloatMultAccOp", ("float",), 2, vfmsfpCode, True)
+    threeEqualRegInstFp("vfms", "NVfmsQFp", "SimdFloatMultAccOp", ("float",), 4, vfmsfpCode, True)
+
     vmlsfpCode = '''
         FPSCR fpscr = (FPSCR) FpscrExc;
         float mid = binaryOp(fpscr, srcReg1, srcReg2, fpMulS,
@@ -2765,7 +3271,7 @@ let {{
             fpscr.idc = 1;
         VfpSavedState state = prepFpState(VfpRoundNearest);
         __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
-        destReg = vfpFpSToFixed(srcElem1, false, false, imm);
+        destReg = vfpFpToFixed<float>(srcElem1, false, 32, imm);
         __asm__ __volatile__("" :: "m" (destReg));
         finishVfp(fpscr, state, true);
         FpscrExc = fpscr;
@@ -2781,7 +3287,7 @@ let {{
             fpscr.idc = 1;
         VfpSavedState state = prepFpState(VfpRoundNearest);
         __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
-        destReg = vfpFpSToFixed(srcElem1, true, false, imm);
+        destReg = vfpFpToFixed<float>(srcElem1, true, 32, imm);
         __asm__ __volatile__("" :: "m" (destReg));
         finishVfp(fpscr, state, true);
         FpscrExc = fpscr;
@@ -2795,7 +3301,7 @@ let {{
         FPSCR fpscr = (FPSCR) FpscrExc;
         VfpSavedState state = prepFpState(VfpRoundNearest);
         __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
-        destElem = vfpUFixedToFpS(true, true, srcReg1, false, imm);
+        destElem = vfpUFixedToFpS(true, true, srcReg1, 32, imm);
         __asm__ __volatile__("" :: "m" (destElem));
         finishVfp(fpscr, state, true);
         FpscrExc = fpscr;
@@ -2809,7 +3315,7 @@ let {{
         FPSCR fpscr = (FPSCR) FpscrExc;
         VfpSavedState state = prepFpState(VfpRoundNearest);
         __asm__ __volatile__("" : "=m" (srcReg1) : "m" (srcReg1));
-        destElem = vfpSFixedToFpS(true, true, srcReg1, false, imm);
+        destElem = vfpSFixedToFpS(true, true, srcReg1, 32, imm);
         __asm__ __volatile__("" :: "m" (destElem));
         finishVfp(fpscr, state, true);
         FpscrExc = fpscr;
@@ -3296,10 +3802,7 @@ let {{
             } else {
                 index -= eCount;
                 if (index >= eCount) {
-                    if (FullSystem)
-                        fault = new UndefinedInstruction;
-                    else
-                        fault = new UndefinedInstruction(false, mnemonic);
+                    fault = new UndefinedInstruction(machInst, false, mnemonic);
                 } else {
                     destReg.elements[i] = srcReg2.elements[index];
                 }
diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa
new file mode 100644
index 000000000..e065761f4
--- /dev/null
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -0,0 +1,3355 @@
+// -*- mode: c++ -*-
+
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Giacomo Gabrielli
+//          Mbou Eyole
+
+let {{
+
+    header_output = ""
+    exec_output = ""
+
+    # FP types (FP operations always work with unsigned representations)
+    floatTypes = ("uint32_t", "uint64_t")
+    smallFloatTypes = ("uint32_t",)
+
+    def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
+                           readDest=False, pairwise=False, scalar=False,
+                           byElem=False):
+        assert (not pairwise) or ((not byElem) and (not scalar))
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect srcReg1, destReg;
+        '''
+        if byElem:
+            # 2nd register operand has to be read fully
+            eWalkCode += '''
+        FullRegVect srcReg2;
+        '''
+        else:
+            eWalkCode += '''
+        RegVect srcReg2;
+        '''
+        for reg in range(rCount):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+        ''' % { "reg" : reg }
+            if readDest:
+                eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        if byElem:
+            # 2nd operand has to be read fully
+            for reg in range(rCount, 4):
+                eWalkCode += '''
+        srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        readDestCode = ''
+        if readDest:
+            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+        if pairwise:
+            eWalkCode += '''
+        for (unsigned i = 0; i < eCount; i++) {
+            Element srcElem1 = gtoh(2 * i < eCount ?
+                                    srcReg1.elements[2 * i] :
+                                    srcReg2.elements[2 * i - eCount]);
+            Element srcElem2 = gtoh(2 * i < eCount ?
+                                    srcReg1.elements[2 * i + 1] :
+                                    srcReg2.elements[2 * i + 1 - eCount]);
+            Element destElem;
+            %(readDest)s
+            %(op)s
+            destReg.elements[i] = htog(destElem);
+        }
+        ''' % { "op" : op, "readDest" : readDestCode }
+        else:
+            scalarCheck = '''
+            if (i != 0) {
+                destReg.elements[i] = 0;
+                continue;
+            }
+            '''
+            eWalkCode += '''
+        for (unsigned i = 0; i < eCount; i++) {
+            %(scalarCheck)s
+            Element srcElem1 = gtoh(srcReg1.elements[i]);
+            Element srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
+            Element destElem;
+            %(readDest)s
+            %(op)s
+            destReg.elements[i] = htog(destElem);
+        }
+        ''' % { "op" : op, "readDest" : readDestCode,
+                "scalarCheck" : scalarCheck if scalar else "",
+                "src2Index" : "imm" if byElem else "i" }
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        if rCount < 4:  # zero upper half
+            for reg in range(rCount, 4):
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX2RegImmOp" if byElem else "DataX2RegOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        if byElem:
+            header_output += NeonX2RegImmOpDeclare.subst(iop)
+        else:
+            header_output += NeonX2RegOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def threeUnequalRegInstX(name, Name, opClass, types, op,
+                             bigSrc1, bigSrc2, bigDest, readDest, scalar=False,
+                             byElem=False, hi=False):
+        assert not (scalar and hi)
+        global header_output, exec_output
+        src1Cnt = src2Cnt = destCnt = 2
+        src1Prefix = src2Prefix = destPrefix = ''
+        if bigSrc1:
+            src1Cnt = 4
+            src1Prefix = 'Big'
+        if bigSrc2:
+            src2Cnt = 4
+            src2Prefix = 'Big'
+        if bigDest:
+            destCnt = 4
+            destPrefix = 'Big'
+        if byElem:
+            src2Prefix = 'Full'
+        eWalkCode = simd64EnabledCheckCode + '''
+        %sRegVect srcReg1;
+        %sRegVect srcReg2;
+        %sRegVect destReg;
+        ''' % (src1Prefix, src2Prefix, destPrefix)
+        srcReg1 = 0
+        if hi and not bigSrc1:  # long/widening operations
+            srcReg1 = 2
+        for reg in range(src1Cnt):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(srcReg1)d_uw);
+        ''' % { "reg" : reg, "srcReg1" : srcReg1 }
+            srcReg1 += 1
+        srcReg2 = 0
+        if (not byElem) and (hi and not bigSrc2):  # long/widening operations
+            srcReg2 = 2
+        for reg in range(src2Cnt):
+            eWalkCode += '''
+        srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(srcReg2)d_uw);
+        ''' % { "reg" : reg, "srcReg2" : srcReg2 }
+            srcReg2 += 1
+        if byElem:
+            # 2nd operand has to be read fully
+            for reg in range(src2Cnt, 4):
+                eWalkCode += '''
+        srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        if readDest:
+            for reg in range(destCnt):
+                eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        readDestCode = ''
+        if readDest:
+            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+        scalarCheck = '''
+            if (i != 0) {
+                destReg.elements[i] = 0;
+                continue;
+            }
+            '''
+        eWalkCode += '''
+        for (unsigned i = 0; i < eCount; i++) {
+            %(scalarCheck)s
+            %(src1Prefix)sElement srcElem1 = gtoh(srcReg1.elements[i]);
+            %(src1Prefix)sElement srcElem2 = gtoh(srcReg2.elements[%(src2Index)s]);
+            %(destPrefix)sElement destElem;
+            %(readDest)s
+            %(op)s
+            destReg.elements[i] = htog(destElem);
+        }
+        ''' % { "op" : op, "readDest" : readDestCode,
+                "src1Prefix" : src1Prefix, "src2Prefix" : src2Prefix,
+                "destPrefix" : destPrefix,
+                "scalarCheck" : scalarCheck if scalar else "",
+                "src2Index" : "imm" if byElem else "i" }
+        destReg = 0
+        if hi and not bigDest:
+            # narrowing operations
+            destReg = 2
+        for reg in range(destCnt):
+            eWalkCode += '''
+        AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg, "destReg": destReg }
+            destReg += 1
+        if destCnt < 4 and not hi:  # zero upper half
+            for reg in range(destCnt, 4):
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX2RegImmOp" if byElem else "DataX2RegOp",
+                            { "code": eWalkCode,
+                              "r_count": 2,
+                              "op_class": opClass }, [])
+        if byElem:
+            header_output += NeonX2RegImmOpDeclare.subst(iop)
+        else:
+            header_output += NeonX2RegOpDeclare.subst(iop)
+        exec_output += NeonXUnequalRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def threeRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
+                            scalar=False, byElem=False, hi=False):
+        assert not byElem
+        threeUnequalRegInstX(name, Name, opClass, types, op,
+                             True, True, False, readDest, scalar, byElem, hi)
+
+    def threeRegLongInstX(name, Name, opClass, types, op, readDest=False,
+                          scalar=False, byElem=False, hi=False):
+        threeUnequalRegInstX(name, Name, opClass, types, op,
+                             False, False, True, readDest, scalar, byElem, hi)
+
+    def threeRegWideInstX(name, Name, opClass, types, op, readDest=False,
+                          scalar=False, byElem=False, hi=False):
+        assert not byElem
+        threeUnequalRegInstX(name, Name, opClass, types, op,
+                             True, False, True, readDest, scalar, byElem, hi)
+
+    def twoEqualRegInstX(name, Name, opClass, types, rCount, op,
+                         readDest=False, scalar=False, byElem=False,
+                         hasImm=False, isDup=False):
+        global header_output, exec_output
+        assert (not isDup) or byElem
+        if byElem:
+            hasImm = True
+        if isDup:
+            eWalkCode = simd64EnabledCheckCode + '''
+        FullRegVect srcReg1;
+        RegVect destReg;
+        '''
+        else:
+            eWalkCode = simd64EnabledCheckCode + '''
+        RegVect srcReg1, destReg;
+        '''
+        for reg in range(4 if isDup else rCount):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        ''' % { "reg" : reg }
+            if readDest:
+                eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        readDestCode = ''
+        if readDest:
+            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+        scalarCheck = '''
+            if (i != 0) {
+                destReg.elements[i] = 0;
+                continue;
+            }
+            '''
+        eWalkCode += '''
+        for (unsigned i = 0; i < eCount; i++) {
+            %(scalarCheck)s
+            unsigned j = i;
+            Element srcElem1 = gtoh(srcReg1.elements[%(src1Index)s]);
+            Element destElem;
+            %(readDest)s
+            %(op)s
+            destReg.elements[j] = htog(destElem);
+        }
+        ''' % { "op" : op, "readDest" : readDestCode,
+                "scalarCheck" : scalarCheck if scalar else "",
+                "src1Index" : "imm" if byElem else "i" }
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        if rCount < 4:  # zero upper half
+            for reg in range(rCount, 4):
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1RegImmOp" if hasImm else "DataX1RegOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        if hasImm:
+            header_output += NeonX1RegImmOpDeclare.subst(iop)
+        else:
+            header_output += NeonX1RegOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def twoRegLongInstX(name, Name, opClass, types, op, readDest=False,
+                        hi=False, hasImm=False):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect srcReg1;
+        BigRegVect destReg;
+        '''
+        destReg = 0 if not hi else 2
+        for reg in range(2):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(destReg)d_uw);
+        ''' % { "reg" : reg, "destReg": destReg }
+            destReg += 1
+        destReg = 0 if not hi else 2
+        if readDest:
+            for reg in range(4):
+                eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+                destReg += 1
+        readDestCode = ''
+        if readDest:
+            readDestCode = 'destReg = gtoh(destReg.elements[i]);'
+        eWalkCode += '''
+        for (unsigned i = 0; i < eCount; i++) {
+            Element srcElem1 = gtoh(srcReg1.elements[i]);
+            BigElement destElem;
+            %(readDest)s
+            %(op)s
+            destReg.elements[i] = htog(destElem);
+        }
+        ''' % { "op" : op, "readDest" : readDestCode }
+        for reg in range(4):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1RegImmOp" if hasImm else "DataX1RegOp",
+                            { "code": eWalkCode,
+                              "r_count": 2,
+                              "op_class": opClass }, [])
+        if hasImm:
+            header_output += NeonX1RegImmOpDeclare.subst(iop)
+        else:
+            header_output += NeonX1RegOpDeclare.subst(iop)
+        exec_output += NeonXUnequalRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def twoRegNarrowInstX(name, Name, opClass, types, op, readDest=False,
+                          scalar=False, hi=False, hasImm=False):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        BigRegVect srcReg1;
+        RegVect destReg;
+        '''
+        for reg in range(4):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        if readDest:
+            for reg in range(2):
+                eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        else:
+            eWalkCode += '''
+        destReg.elements[0] = 0;
+        ''' % { "reg" : reg }
+        readDestCode = ''
+        if readDest:
+            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+        scalarCheck = '''
+            if (i != 0) {
+                destReg.elements[i] = 0;
+                continue;
+            }
+            '''
+        eWalkCode += '''
+        for (unsigned i = 0; i < eCount; i++) {
+            %(scalarCheck)s
+            BigElement srcElem1 = gtoh(srcReg1.elements[i]);
+            Element destElem;
+            %(readDest)s
+            %(op)s
+            destReg.elements[i] = htog(destElem);
+        }
+        ''' % { "op" : op, "readDest" : readDestCode,
+                "scalarCheck" : scalarCheck if scalar else "" }
+        destReg = 0 if not hi else 2
+        for reg in range(2):
+            eWalkCode += '''
+        AA64FpDestP%(destReg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg, "destReg": destReg }
+            destReg += 1
+        if not hi:
+            for reg in range(2, 4):  # zero upper half
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1RegImmOp" if hasImm else "DataX1RegOp",
+                            { "code": eWalkCode,
+                              "r_count": 2,
+                              "op_class": opClass }, [])
+        if hasImm:
+            header_output += NeonX1RegImmOpDeclare.subst(iop)
+        else:
+            header_output += NeonX1RegOpDeclare.subst(iop)
+        exec_output += NeonXUnequalRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def threeRegScrambleInstX(name, Name, opClass, types, rCount, op):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect srcReg1, srcReg2, destReg;
+        '''
+        for reg in range(rCount):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        eWalkCode += op
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        if rCount < 4:
+            for reg in range(rCount, 4):
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX2RegOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX2RegOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def insFromVecElemInstX(name, Name, opClass, types, rCount):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        FullRegVect srcReg1;
+        RegVect destReg;
+        '''
+        for reg in range(4):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        for reg in range(rCount):
+            eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        eWalkCode += '''
+        Element srcElem1 = gtoh(srcReg1.elements[imm2]);
+        Element destElem = srcElem1;
+        destReg.elements[imm1] = htog(destElem);
+        '''
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1Reg2ImmOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX1Reg2ImmOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def twoRegPairwiseScInstX(name, Name, opClass, types, rCount, op):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect srcReg1, destReg;
+        '''
+        for reg in range(rCount):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        eWalkCode += '''
+        Element srcElem1 = gtoh(srcReg1.elements[0]);
+        Element srcElem2 = gtoh(srcReg1.elements[1]);
+        Element destElem;
+        %(op)s
+        destReg.elements[0] = htog(destElem);
+        ''' % { "op" : op }
+        destCnt = rCount / 2
+        for reg in range(destCnt):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        for reg in range(destCnt, 4):  # zero upper half
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1RegOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX1RegOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def twoRegAcrossInstX(name, Name, opClass, types, rCount, op,
+                          doubleDest=False, long=False):
+        global header_output, exec_output
+        destPrefix = "Big" if long else ""
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect srcReg1;
+        %sRegVect destReg;
+        ''' % destPrefix
+        for reg in range(rCount):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        eWalkCode += '''
+        destReg.regs[0] = 0;
+        %(destPrefix)sElement destElem = 0;
+        for (unsigned i = 0; i < eCount; i++) {
+            Element srcElem1 = gtoh(srcReg1.elements[i]);
+            if (i == 0) {
+                destElem = srcElem1;
+            } else {
+                %(op)s
+            }
+        }
+        destReg.elements[0] = htog(destElem);
+        ''' % { "op" : op, "destPrefix" : destPrefix }
+        destCnt = 2 if doubleDest else 1
+        for reg in range(destCnt):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        for reg in range(destCnt, 4):  # zero upper half
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1RegOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX1RegOpDeclare.subst(iop)
+        if long:
+            exec_output += NeonXUnequalRegOpExecute.subst(iop)
+        else:
+            exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def twoRegCondenseInstX(name, Name, opClass, types, rCount, op,
+                            readDest=False):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect srcRegs;
+        BigRegVect destReg;
+        '''
+        for reg in range(rCount):
+            eWalkCode += '''
+        srcRegs.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        ''' % { "reg" : reg }
+            if readDest:
+                eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        readDestCode = ''
+        if readDest:
+            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+        eWalkCode += '''
+        for (unsigned i = 0; i < eCount / 2; i++) {
+            Element srcElem1 = gtoh(srcRegs.elements[2 * i]);
+            Element srcElem2 = gtoh(srcRegs.elements[2 * i + 1]);
+            BigElement destElem;
+            %(readDest)s
+            %(op)s
+            destReg.elements[i] = htog(destElem);
+        }
+        ''' % { "op" : op, "readDest" : readDestCode }
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        if rCount < 4:  # zero upper half
+            for reg in range(rCount, 4):
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1RegOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX1RegOpDeclare.subst(iop)
+        exec_output += NeonXUnequalRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def oneRegImmInstX(name, Name, opClass, types, rCount, op, readDest=False):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect destReg;
+        '''
+        if readDest:
+            for reg in range(rCount):
+                eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        readDestCode = ''
+        if readDest:
+            readDestCode = 'destElem = gtoh(destReg.elements[i]);'
+        eWalkCode += '''
+        for (unsigned i = 0; i < eCount; i++) {
+            Element destElem;
+            %(readDest)s
+            %(op)s
+            destReg.elements[i] = htog(destElem);
+        }
+        ''' % { "op" : op, "readDest" : readDestCode }
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        if rCount < 4:  # zero upper half
+            for reg in range(rCount, 4):
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataXImmOnlyOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX1RegImmOnlyOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def dupGprInstX(name, Name, opClass, types, rCount, gprSpec):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect destReg;
+        for (unsigned i = 0; i < eCount; i++) {
+            destReg.elements[i] = htog((Element) %sOp1);
+        }
+        ''' % gprSpec
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        if rCount < 4:  # zero upper half
+            for reg in range(rCount, 4):
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1RegOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX1RegOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def extInstX(name, Name, opClass, types, rCount, op):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect srcReg1, srcReg2, destReg;
+        '''
+        for reg in range(rCount):
+            eWalkCode += '''
+        srcReg1.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        eWalkCode += op
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        if rCount < 4:  # zero upper half
+            for reg in range(rCount, 4):
+                eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX2RegImmOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX2RegImmOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def insFromGprInstX(name, Name, opClass, types, rCount, gprSpec):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        RegVect destReg;
+        '''
+        for reg in range(rCount):
+            eWalkCode += '''
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        eWalkCode += '''
+        destReg.elements[imm] = htog((Element) %sOp1);
+        ''' % gprSpec
+        for reg in range(rCount):
+            eWalkCode += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX1RegImmOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX1RegImmOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def insToGprInstX(name, Name, opClass, types, rCount, gprSpec,
+                      signExt=False):
+        global header_output, exec_output
+        eWalkCode = simd64EnabledCheckCode + '''
+        FullRegVect srcReg;
+        '''
+        for reg in range(4):
+            eWalkCode += '''
+        srcReg.regs[%(reg)d] = htog(AA64FpOp1P%(reg)d_uw);
+        ''' % { "reg" : reg }
+        if signExt:
+            eWalkCode += '''
+        %sDest = sext<sizeof(Element) * 8>(srcReg.elements[imm]);
+        ''' % gprSpec
+        else:
+            eWalkCode += '''
+        %sDest = srcReg.elements[imm];
+        ''' % gprSpec
+        iop = InstObjParams(name, Name,
+                            "DataX1RegImmOp",
+                            { "code": eWalkCode,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX1RegImmOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    def tbxTblInstX(name, Name, opClass, types, length, isTbl, rCount):
+        global header_output, decoder_output, exec_output
+        code = simd64EnabledCheckCode + '''
+        union
+        {
+            uint8_t bytes[64];
+            FloatRegBits regs[16];
+        } table;
+
+        union
+        {
+            uint8_t bytes[%(rCount)d * 4];
+            FloatRegBits regs[%(rCount)d];
+        } destReg, srcReg2;
+
+        const unsigned length = %(length)d;
+        const bool isTbl = %(isTbl)s;
+        ''' % { "rCount" : rCount, "length" : length, "isTbl" : isTbl }
+        for reg in range(rCount):
+            code += '''
+        srcReg2.regs[%(reg)d] = htog(AA64FpOp2P%(reg)d_uw);
+        destReg.regs[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+        ''' % { "reg" : reg }
+        for reg in range(16):
+            if reg < length * 4:
+                code += '''
+        table.regs[%(reg)d] = htog(AA64FpOp1P%(p)dV%(v)dS_uw);
+        ''' % { "reg" : reg, "p" : reg % 4, "v" : reg / 4 }
+            else:
+                code += '''
+        table.regs[%(reg)d] = 0;
+        ''' % { "reg" : reg }
+        code += '''
+        for (unsigned i = 0; i < sizeof(destReg); i++) {
+            uint8_t index = srcReg2.bytes[i];
+            if (index < 16 * length) {
+                destReg.bytes[i] = table.bytes[index];
+            } else {
+                if (isTbl)
+                    destReg.bytes[i] = 0;
+                // else destReg.bytes[i] unchanged
+            }
+        }
+        '''
+        for reg in range(rCount):
+            code += '''
+        AA64FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
+        ''' % { "reg" : reg }
+        if rCount < 4:  # zero upper half
+            for reg in range(rCount, 4):
+                code += '''
+        AA64FpDestP%(reg)d_uw = 0;
+        ''' % { "reg" : reg }
+        iop = InstObjParams(name, Name,
+                            "DataX2RegOp",
+                            { "code": code,
+                              "r_count": rCount,
+                              "op_class": opClass }, [])
+        header_output += NeonX2RegOpDeclare.subst(iop)
+        exec_output += NeonXEqualRegOpExecute.subst(iop)
+        for type in types:
+            substDict = { "targs" : type,
+                          "class_name" : Name }
+            exec_output += NeonXExecDeclare.subst(substDict)
+
+    # ABS
+    absCode = '''
+            if (srcElem1 < 0) {
+                destElem = -srcElem1;
+            } else {
+                destElem = srcElem1;
+            }
+    '''
+    twoEqualRegInstX("abs", "AbsDX", "SimdAluOp", signedTypes, 2, absCode)
+    twoEqualRegInstX("abs", "AbsQX", "SimdAluOp", signedTypes, 4, absCode)
+    # ADD
+    addCode = "destElem = srcElem1 + srcElem2;"
+    threeEqualRegInstX("add", "AddDX", "SimdAddOp", unsignedTypes, 2, addCode)
+    threeEqualRegInstX("add", "AddQX", "SimdAddOp", unsignedTypes, 4, addCode)
+    # ADDHN, ADDHN2
+    addhnCode = '''
+            destElem = ((BigElement)srcElem1 + (BigElement)srcElem2) >>
+                        (sizeof(Element) * 8);
+    '''
+    threeRegNarrowInstX("addhn", "AddhnX", "SimdAddOp", smallUnsignedTypes,
+                        addhnCode)
+    threeRegNarrowInstX("addhn2", "Addhn2X", "SimdAddOp", smallUnsignedTypes,
+                        addhnCode, hi=True)
+    # ADDP (scalar)
+    twoRegPairwiseScInstX("addp", "AddpScQX", "SimdAddOp", ("uint64_t",), 4,
+                          addCode)
+    # ADDP (vector)
+    threeEqualRegInstX("addp", "AddpDX", "SimdAddOp", smallUnsignedTypes, 2,
+                       addCode, pairwise=True)
+    threeEqualRegInstX("addp", "AddpQX", "SimdAddOp", unsignedTypes, 4,
+                       addCode, pairwise=True)
+    # ADDV
+    # Note: SimdAddOp can be a bit optimistic here
+    addAcrossCode = "destElem += srcElem1;"
+    twoRegAcrossInstX("addv", "AddvDX", "SimdAddOp", ("uint8_t", "uint16_t"),
+                      2, addAcrossCode)
+    twoRegAcrossInstX("addv", "AddvQX", "SimdAddOp", smallUnsignedTypes, 4,
+                      addAcrossCode)
+    # AND
+    andCode = "destElem = srcElem1 & srcElem2;"
+    threeEqualRegInstX("and", "AndDX", "SimdAluOp", ("uint64_t",), 2, andCode)
+    threeEqualRegInstX("and", "AndQX", "SimdAluOp", ("uint64_t",), 4, andCode)
+    # BIC (immediate)
+    bicImmCode = "destElem &= ~imm;"
+    oneRegImmInstX("bic", "BicImmDX", "SimdAluOp", ("uint64_t",), 2,
+                   bicImmCode, True)
+    oneRegImmInstX("bic", "BicImmQX", "SimdAluOp", ("uint64_t",), 4,
+                   bicImmCode, True)
+    # BIC (register)
+    bicCode = "destElem = srcElem1 & ~srcElem2;"
+    threeEqualRegInstX("bic", "BicDX", "SimdAluOp", ("uint64_t",), 2, bicCode)
+    threeEqualRegInstX("bic", "BicQX", "SimdAluOp", ("uint64_t",), 4, bicCode)
+    # BIF
+    bifCode = "destElem = (destElem & srcElem2) | (srcElem1 & ~srcElem2);"
+    threeEqualRegInstX("bif", "BifDX", "SimdAluOp", ("uint64_t",), 2, bifCode,
+                       True)
+    threeEqualRegInstX("bif", "BifQX", "SimdAluOp", ("uint64_t",), 4, bifCode,
+                       True)
+    # BIT
+    bitCode = "destElem = (srcElem1 & srcElem2) | (destElem & ~srcElem2);"
+    threeEqualRegInstX("bit", "BitDX", "SimdAluOp", ("uint64_t",), 2, bitCode,
+                       True)
+    threeEqualRegInstX("bit", "BitQX", "SimdAluOp", ("uint64_t",), 4, bitCode,
+                       True)
+    # BSL
+    bslCode = "destElem = (srcElem1 & destElem) | (srcElem2 & ~destElem);"
+    threeEqualRegInstX("bsl", "BslDX", "SimdAluOp", ("uint64_t",), 2, bslCode,
+                       True)
+    threeEqualRegInstX("bsl", "BslQX", "SimdAluOp", ("uint64_t",), 4, bslCode,
+                       True)
+    # CLS
+    clsCode = '''
+            unsigned count = 0;
+            if (srcElem1 < 0) {
+                srcElem1 <<= 1;
+                while (srcElem1 < 0 && count < sizeof(Element) * 8 - 1) {
+                    count++;
+                    srcElem1 <<= 1;
+                }
+            } else {
+                srcElem1 <<= 1;
+                while (srcElem1 >= 0 && count < sizeof(Element) * 8 - 1) {
+                    count++;
+                    srcElem1 <<= 1;
+                }
+            }
+            destElem = count;
+    '''
+    twoEqualRegInstX("cls", "ClsDX", "SimdAluOp", smallSignedTypes, 2, clsCode)
+    twoEqualRegInstX("cls", "ClsQX", "SimdAluOp", smallSignedTypes, 4, clsCode)
+    # CLZ
+    clzCode = '''
+            unsigned count = 0;
+            while (srcElem1 >= 0 && count < sizeof(Element) * 8) {
+                count++;
+                srcElem1 <<= 1;
+            }
+            destElem = count;
+    '''
+    twoEqualRegInstX("clz", "ClzDX", "SimdAluOp", smallSignedTypes, 2, clzCode)
+    twoEqualRegInstX("clz", "ClzQX", "SimdAluOp", smallSignedTypes, 4, clzCode)
+    # CMEQ (register)
+    cmeqCode = "destElem = (srcElem1 == srcElem2) ? (Element)(-1) : 0;"
+    threeEqualRegInstX("cmeq", "CmeqDX", "SimdCmpOp", unsignedTypes, 2,
+                       cmeqCode)
+    threeEqualRegInstX("cmeq", "CmeqQX", "SimdCmpOp", unsignedTypes, 4,
+                       cmeqCode)
+    # CMEQ (zero)
+    cmeqZeroCode = "destElem = (srcElem1 == 0) ? (Element)(-1) : 0;"
+    twoEqualRegInstX("cmeq", "CmeqZeroDX", "SimdCmpOp", signedTypes, 2,
+                     cmeqZeroCode)
+    twoEqualRegInstX("cmeq", "CmeqZeroQX", "SimdCmpOp", signedTypes, 4,
+                     cmeqZeroCode)
+    # CMGE (register)
+    cmgeCode = "destElem = (srcElem1 >= srcElem2) ? (Element)(-1) : 0;"
+    threeEqualRegInstX("cmge", "CmgeDX", "SimdCmpOp", signedTypes, 2, cmgeCode)
+    threeEqualRegInstX("cmge", "CmgeQX", "SimdCmpOp", signedTypes, 4, cmgeCode)
+    # CMGE (zero)
+    cmgeZeroCode = "destElem = (srcElem1 >= 0) ? (Element)(-1) : 0;"
+    twoEqualRegInstX("cmge", "CmgeZeroDX", "SimdCmpOp", signedTypes, 2,
+                     cmgeZeroCode)
+    twoEqualRegInstX("cmge", "CmgeZeroQX", "SimdCmpOp", signedTypes, 4,
+                     cmgeZeroCode)
+    # CMGT (register)
+    cmgtCode = "destElem = (srcElem1 > srcElem2) ? (Element)(-1) : 0;"
+    threeEqualRegInstX("cmgt", "CmgtDX", "SimdCmpOp", signedTypes, 2, cmgtCode)
+    threeEqualRegInstX("cmgt", "CmgtQX", "SimdCmpOp", signedTypes, 4, cmgtCode)
+    # CMGT (zero)
+    cmgtZeroCode = "destElem = (srcElem1 > 0) ? (Element)(-1) : 0;"
+    twoEqualRegInstX("cmgt", "CmgtZeroDX", "SimdCmpOp", signedTypes, 2,
+                     cmgtZeroCode)
+    twoEqualRegInstX("cmgt", "CmgtZeroQX", "SimdCmpOp", signedTypes, 4,
+                     cmgtZeroCode)
+    # CMHI (register)
+    threeEqualRegInstX("cmhi", "CmhiDX", "SimdCmpOp", unsignedTypes, 2,
+                       cmgtCode)
+    threeEqualRegInstX("cmhi", "CmhiQX", "SimdCmpOp", unsignedTypes, 4,
+                       cmgtCode)
+    # CMHS (register)
+    threeEqualRegInstX("cmhs", "CmhsDX", "SimdCmpOp", unsignedTypes, 2,
+                       cmgeCode)
+    threeEqualRegInstX("cmhs", "CmhsQX", "SimdCmpOp", unsignedTypes, 4,
+                       cmgeCode)
+    # CMLE (zero)
+    cmleZeroCode = "destElem = (srcElem1 <= 0) ? (Element)(-1) : 0;"
+    twoEqualRegInstX("cmle", "CmleZeroDX", "SimdCmpOp", signedTypes, 2,
+                     cmleZeroCode)
+    twoEqualRegInstX("cmle", "CmleZeroQX", "SimdCmpOp", signedTypes, 4,
+                     cmleZeroCode)
+    # CMLT (zero)
+    cmltZeroCode = "destElem = (srcElem1 < 0) ? (Element)(-1) : 0;"
+    twoEqualRegInstX("cmlt", "CmltZeroDX", "SimdCmpOp", signedTypes, 2,
+                     cmltZeroCode)
+    twoEqualRegInstX("cmlt", "CmltZeroQX", "SimdCmpOp", signedTypes, 4,
+                     cmltZeroCode)
+    # CMTST (register)
+    tstCode = "destElem = (srcElem1 & srcElem2) ? (Element)(-1) : 0;"
+    threeEqualRegInstX("cmtst", "CmtstDX", "SimdAluOp", unsignedTypes, 2,
+                       tstCode)
+    threeEqualRegInstX("cmtst", "CmtstQX", "SimdAluOp", unsignedTypes, 4,
+                       tstCode)
+    # CNT
+    cntCode = '''
+            unsigned count = 0;
+            while (srcElem1 && count < sizeof(Element) * 8) {
+                count += srcElem1 & 0x1;
+                srcElem1 >>= 1;
+            }
+            destElem = count;
+    '''
+    twoEqualRegInstX("cnt", "CntDX", "SimdAluOp", ("uint8_t",), 2, cntCode)
+    twoEqualRegInstX("cnt", "CntQX", "SimdAluOp", ("uint8_t",), 4, cntCode)
+    # DUP (element)
+    dupCode = "destElem = srcElem1;"
+    twoEqualRegInstX("dup", "DupElemDX", "SimdMiscOp", smallUnsignedTypes, 2,
+                     dupCode, isDup=True, byElem=True)
+    twoEqualRegInstX("dup", "DupElemQX", "SimdMiscOp", unsignedTypes, 4,
+                     dupCode, isDup=True, byElem=True)
+    twoEqualRegInstX("dup", "DupElemScX", "SimdMiscOp", unsignedTypes, 4,
+                     dupCode, isDup=True, byElem=True, scalar=True)
+    # DUP (general register)
+    dupGprInstX("dup", "DupGprWDX", "SimdMiscOp", smallUnsignedTypes, 2, 'W')
+    dupGprInstX("dup", "DupGprWQX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
+    dupGprInstX("dup", "DupGprXQX", "SimdMiscOp", ("uint64_t",), 4, 'X')
+    # EOR
+    eorCode = "destElem = srcElem1 ^ srcElem2;"
+    threeEqualRegInstX("eor", "EorDX", "SimdAluOp", ("uint64_t",), 2, eorCode)
+    threeEqualRegInstX("eor", "EorQX", "SimdAluOp", ("uint64_t",), 4, eorCode)
+    # EXT
+    extCode = '''
+            for (unsigned i = 0; i < eCount; i++) {
+                unsigned index = i + imm;
+                if (index < eCount) {
+                    destReg.elements[i] = srcReg1.elements[index];
+                } else {
+                    index -= eCount;
+                    if (index >= eCount) {
+                        fault = new UndefinedInstruction(machInst, false, mnemonic);
+                    } else {
+                        destReg.elements[i] = srcReg2.elements[index];
+                    }
+                }
+            }
+    '''
+    extInstX("Ext", "ExtDX", "SimdMiscOp", ("uint8_t",), 2, extCode)
+    extInstX("Ext", "ExtQX", "SimdMiscOp", ("uint8_t",), 4, extCode)
+    # FABD
+    fpOp = '''
+            FPSCR fpscr = (FPSCR) FpscrExc;
+            destElem = %s;
+            FpscrExc = fpscr;
+    '''
+    fabdCode = fpOp % "fplibAbs<Element>(fplibSub(srcElem1, srcElem2, fpscr))"
+    threeEqualRegInstX("fabd", "FabdDX", "SimdFloatAddOp", smallFloatTypes, 2,
+                       fabdCode)
+    threeEqualRegInstX("fabd", "FabdQX", "SimdFloatAddOp", floatTypes, 4,
+                       fabdCode)
+    threeEqualRegInstX("fabd", "FabdScX", "SimdFloatAddOp", floatTypes, 4,
+                       fabdCode, scalar=True)
+    # FABS
+    fabsCode = fpOp % "fplibAbs<Element>(srcElem1)"
+    twoEqualRegInstX("Abs", "FabsDX", "SimdFloatAluOp", smallFloatTypes, 2,
+                     fabsCode)
+    twoEqualRegInstX("Abs", "FabsQX", "SimdFloatAluOp", floatTypes, 4,
+                     fabsCode)
+    # FACGE
+    fpCmpAbsOp = fpOp % ("fplibCompare%s<Element>(fplibAbs<Element>(srcElem1),"
+                         " fplibAbs<Element>(srcElem2), fpscr) ? -1 : 0")
+    facgeCode = fpCmpAbsOp % "GE"
+    threeEqualRegInstX("facge", "FacgeDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, facgeCode)
+    threeEqualRegInstX("facge", "FacgeQX", "SimdFloatCmpOp", floatTypes, 4,
+                       facgeCode)
+    threeEqualRegInstX("facge", "FacgeScX", "SimdFloatCmpOp", floatTypes, 4,
+                       facgeCode, scalar=True)
+    # FACGT
+    facgtCode = fpCmpAbsOp % "GT"
+    threeEqualRegInstX("facgt", "FacgtDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, facgtCode)
+    threeEqualRegInstX("facgt", "FacgtQX", "SimdFloatCmpOp", floatTypes, 4,
+                       facgtCode)
+    threeEqualRegInstX("facgt", "FacgtScX", "SimdFloatCmpOp", floatTypes, 4,
+                       facgtCode, scalar=True)
+    # FADD
+    fpBinOp = fpOp % "fplib%s<Element>(srcElem1, srcElem2, fpscr)"
+    faddCode = fpBinOp % "Add"
+    threeEqualRegInstX("fadd", "FaddDX", "SimdFloatAddOp", smallFloatTypes, 2,
+                       faddCode)
+    threeEqualRegInstX("fadd", "FaddQX", "SimdFloatAddOp", floatTypes, 4,
+                       faddCode)
+    # FADDP (scalar)
+    twoRegPairwiseScInstX("faddp", "FaddpScDX", "SimdFloatAddOp",
+                          ("uint32_t",), 2, faddCode)
+    twoRegPairwiseScInstX("faddp", "FaddpScQX", "SimdFloatAddOp",
+                          ("uint64_t",), 4, faddCode)
+    # FADDP (vector)
+    threeEqualRegInstX("faddp", "FaddpDX", "SimdFloatAddOp", smallFloatTypes,
+                       2, faddCode, pairwise=True)
+    threeEqualRegInstX("faddp", "FaddpQX", "SimdFloatAddOp", floatTypes, 4,
+                       faddCode, pairwise=True)
+    # FCMEQ (register)
+    fpCmpOp = fpOp % ("fplibCompare%s<Element>(srcElem1, srcElem2, fpscr) ?"
+                      " -1 : 0")
+    fcmeqCode = fpCmpOp % "EQ"
+    threeEqualRegInstX("fcmeq", "FcmeqDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, fcmeqCode)
+    threeEqualRegInstX("fcmeq", "FcmeqQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fcmeqCode)
+    threeEqualRegInstX("fcmeq", "FcmeqScX", "SimdFloatCmpOp", floatTypes, 4,
+                       fcmeqCode, scalar=True)
+    # FCMEQ (zero)
+    fpCmpZeroOp = fpOp % "fplibCompare%s<Element>(srcElem1, 0, fpscr) ? -1 : 0"
+    fcmeqZeroCode = fpCmpZeroOp % "EQ"
+    twoEqualRegInstX("fcmeq", "FcmeqZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+                     2, fcmeqZeroCode)
+    twoEqualRegInstX("fcmeq", "FcmeqZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmeqZeroCode)
+    twoEqualRegInstX("fcmeq", "FcmeqZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmeqZeroCode, scalar=True)
+    # FCMGE (register)
+    fcmgeCode = fpCmpOp % "GE"
+    threeEqualRegInstX("fcmge", "FcmgeDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, fcmgeCode)
+    threeEqualRegInstX("fcmge", "FcmgeQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fcmgeCode)
+    threeEqualRegInstX("fcmge", "FcmgeScX", "SimdFloatCmpOp", floatTypes, 4,
+                       fcmgeCode, scalar=True)
+    # FCMGE (zero)
+    fcmgeZeroCode = fpCmpZeroOp % "GE"
+    twoEqualRegInstX("fcmge", "FcmgeZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+                     2, fcmgeZeroCode)
+    twoEqualRegInstX("fcmge", "FcmgeZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmgeZeroCode)
+    twoEqualRegInstX("fcmge", "FcmgeZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmgeZeroCode, scalar=True)
+    # FCMGT (register)
+    fcmgtCode = fpCmpOp % "GT"
+    threeEqualRegInstX("fcmgt", "FcmgtDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, fcmgtCode)
+    threeEqualRegInstX("fcmgt", "FcmgtQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fcmgtCode)
+    threeEqualRegInstX("fcmgt", "FcmgtScX", "SimdFloatCmpOp", floatTypes, 4,
+                       fcmgtCode, scalar=True)
+    # FCMGT (zero)
+    fcmgtZeroCode = fpCmpZeroOp % "GT"
+    twoEqualRegInstX("fcmgt", "FcmgtZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+                     2, fcmgtZeroCode)
+    twoEqualRegInstX("fcmgt", "FcmgtZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmgtZeroCode)
+    twoEqualRegInstX("fcmgt", "FcmgtZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmgtZeroCode, scalar=True)
+    # FCMLE (zero)
+    fpCmpRevZeroOp = fpOp % ("fplibCompare%s<Element>(0, srcElem1, fpscr) ?"
+                             " -1 : 0")
+    fcmleZeroCode = fpCmpRevZeroOp % "GE"
+    twoEqualRegInstX("fcmle", "FcmleZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+                     2, fcmleZeroCode)
+    twoEqualRegInstX("fcmle", "FcmleZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmleZeroCode)
+    twoEqualRegInstX("fcmle", "FcmleZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmleZeroCode, scalar=True)
+    # FCMLT (zero)
+    fcmltZeroCode = fpCmpRevZeroOp % "GT"
+    twoEqualRegInstX("fcmlt", "FcmltZeroDX", "SimdFloatCmpOp", smallFloatTypes,
+                     2, fcmltZeroCode)
+    twoEqualRegInstX("fcmlt", "FcmltZeroQX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmltZeroCode)
+    twoEqualRegInstX("fcmlt", "FcmltZeroScX", "SimdFloatCmpOp", floatTypes, 4,
+                     fcmltZeroCode, scalar=True)
+    # FCVTAS
+    fcvtCode = fpOp % ("fplibFPToFixed<Element, Element>("
+                       "srcElem1, %s, %s, %s, fpscr)")
+    fcvtasCode = fcvtCode % ("0", "false", "FPRounding_TIEAWAY")
+    twoEqualRegInstX("fcvtas", "FcvtasDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtasCode)
+    twoEqualRegInstX("fcvtas", "FcvtasQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtasCode)
+    twoEqualRegInstX("fcvtas", "FcvtasScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtasCode, scalar=True)
+    # FCVTAU
+    fcvtauCode = fcvtCode % ("0", "true", "FPRounding_TIEAWAY")
+    twoEqualRegInstX("fcvtau", "FcvtauDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtauCode)
+    twoEqualRegInstX("fcvtau", "FcvtauQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtauCode)
+    twoEqualRegInstX("fcvtau", "FcvtauScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtauCode, scalar=True)
+    # FCVTL, FCVTL2
+    fcvtlCode = fpOp % ("fplibConvert<Element, BigElement>("
+                        "srcElem1, FPCRRounding(fpscr), fpscr)")
+    twoRegLongInstX("fcvtl", "FcvtlX", "SimdCvtOp", ("uint16_t", "uint32_t"),
+                    fcvtlCode)
+    twoRegLongInstX("fcvtl", "Fcvtl2X", "SimdCvtOp", ("uint16_t", "uint32_t"),
+                    fcvtlCode, hi=True)
+    # FCVTMS
+    fcvtmsCode = fcvtCode % ("0", "false", "FPRounding_NEGINF")
+    twoEqualRegInstX("fcvtms", "FcvtmsDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtmsCode)
+    twoEqualRegInstX("fcvtms", "FcvtmsQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtmsCode)
+    twoEqualRegInstX("fcvtms", "FcvtmsScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtmsCode, scalar=True)
+    # FCVTMU
+    fcvtmuCode = fcvtCode % ("0", "true", "FPRounding_NEGINF")
+    twoEqualRegInstX("fcvtmu", "FcvtmuDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtmuCode)
+    twoEqualRegInstX("fcvtmu", "FcvtmuQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtmuCode)
+    twoEqualRegInstX("fcvtmu", "FcvtmuScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtmuCode, scalar=True)
+    # FCVTN, FCVTN2
+    fcvtnCode = fpOp % ("fplibConvert<BigElement, Element>("
+                        "srcElem1, FPCRRounding(fpscr), fpscr)")
+    twoRegNarrowInstX("fcvtn", "FcvtnX", "SimdCvtOp",
+                      ("uint16_t", "uint32_t"), fcvtnCode)
+    twoRegNarrowInstX("fcvtn", "Fcvtn2X", "SimdCvtOp",
+                      ("uint16_t", "uint32_t"), fcvtnCode, hi=True)
+    # FCVTNS
+    fcvtnsCode = fcvtCode % ("0", "false", "FPRounding_TIEEVEN")
+    twoEqualRegInstX("fcvtns", "FcvtnsDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtnsCode)
+    twoEqualRegInstX("fcvtns", "FcvtnsQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtnsCode)
+    twoEqualRegInstX("fcvtns", "FcvtnsScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtnsCode, scalar=True)
+    # FCVTNU
+    fcvtnuCode = fcvtCode % ("0", "true", "FPRounding_TIEEVEN")
+    twoEqualRegInstX("fcvtnu", "FcvtnuDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtnuCode)
+    twoEqualRegInstX("fcvtnu", "FcvtnuQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtnuCode)
+    twoEqualRegInstX("fcvtnu", "FcvtnuScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtnuCode, scalar=True)
+    # FCVTPS
+    fcvtpsCode = fcvtCode % ("0", "false", "FPRounding_POSINF")
+    twoEqualRegInstX("fcvtps", "FcvtpsDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtpsCode)
+    twoEqualRegInstX("fcvtps", "FcvtpsQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtpsCode)
+    twoEqualRegInstX("fcvtps", "FcvtpsScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtpsCode, scalar=True)
+    # FCVTPU
+    fcvtpuCode = fcvtCode % ("0", "true", "FPRounding_POSINF")
+    twoEqualRegInstX("fcvtpu", "FcvtpuDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtpuCode)
+    twoEqualRegInstX("fcvtpu", "FcvtpuQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtpuCode)
+    twoEqualRegInstX("fcvtpu", "FcvtpuScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtpuCode, scalar=True)
+    # FCVTXN, FCVTXN2
+    fcvtxnCode = fpOp % ("fplibConvert<BigElement, Element>("
+                         "srcElem1, FPRounding_ODD, fpscr)")
+    twoRegNarrowInstX("fcvtxn", "FcvtxnX", "SimdCvtOp", smallFloatTypes,
+                      fcvtxnCode)
+    twoRegNarrowInstX("fcvtxn", "Fcvtxn2X", "SimdCvtOp", smallFloatTypes,
+                      fcvtxnCode, hi=True)
+    twoRegNarrowInstX("fcvtxn", "FcvtxnScX", "SimdCvtOp", smallFloatTypes,
+                      fcvtxnCode, scalar=True)
+    # FCVTZS (fixed-point)
+    fcvtzsCode = fcvtCode % ("imm", "false", "FPRounding_ZERO")
+    twoEqualRegInstX("fcvtzs", "FcvtzsFixedDX", "SimdCvtOp", smallFloatTypes,
+                     2, fcvtzsCode, hasImm=True)
+    twoEqualRegInstX("fcvtzs", "FcvtzsFixedQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtzsCode, hasImm=True)
+    twoEqualRegInstX("fcvtzs", "FcvtzsFixedScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtzsCode, hasImm=True, scalar=True)
+    # FCVTZS (integer)
+    fcvtzsIntCode = fcvtCode % ("0", "false", "FPRounding_ZERO")
+    twoEqualRegInstX("fcvtzs", "FcvtzsIntDX", "SimdCvtOp", smallFloatTypes,
+                     2, fcvtzsIntCode)
+    twoEqualRegInstX("fcvtzs", "FcvtzsIntQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtzsIntCode)
+    twoEqualRegInstX("fcvtzs", "FcvtzsIntScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtzsIntCode, scalar=True)
+    # FCVTZU (fixed-point)
+    fcvtzuCode = fcvtCode % ("imm", "true", "FPRounding_ZERO")
+    twoEqualRegInstX("fcvtzu", "FcvtzuFixedDX", "SimdCvtOp", smallFloatTypes,
+                     2, fcvtzuCode, hasImm=True)
+    twoEqualRegInstX("fcvtzu", "FcvtzuFixedQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtzuCode, hasImm=True)
+    twoEqualRegInstX("fcvtzu", "FcvtzuFixedScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtzuCode, hasImm=True, scalar=True)
+    # FCVTZU (integer)
+    fcvtzuIntCode = fcvtCode % ("0", "true", "FPRounding_ZERO")
+    twoEqualRegInstX("fcvtzu", "FcvtzuIntDX", "SimdCvtOp", smallFloatTypes, 2,
+                     fcvtzuIntCode)
+    twoEqualRegInstX("fcvtzu", "FcvtzuIntQX", "SimdCvtOp", floatTypes, 4,
+                     fcvtzuIntCode)
+    twoEqualRegInstX("fcvtzu", "FcvtzuIntScX", "SimdCvtOp", floatTypes, 4,
+                     fcvtzuIntCode, scalar=True)
+    # FDIV
+    fdivCode = fpBinOp % "Div"
+    threeEqualRegInstX("fdiv", "FdivDX", "SimdFloatDivOp", smallFloatTypes, 2,
+                       fdivCode)
+    threeEqualRegInstX("fdiv", "FdivQX", "SimdFloatDivOp", floatTypes, 4,
+                       fdivCode)
+    # FMAX
+    fmaxCode = fpBinOp % "Max"
+    threeEqualRegInstX("fmax", "FmaxDX", "SimdFloatCmpOp", smallFloatTypes, 2,
+                       fmaxCode)
+    threeEqualRegInstX("fmax", "FmaxQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fmaxCode)
+    # FMAXNM
+    fmaxnmCode = fpBinOp % "MaxNum"
+    threeEqualRegInstX("fmaxnm", "FmaxnmDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, fmaxnmCode)
+    threeEqualRegInstX("fmaxnm", "FmaxnmQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fmaxnmCode)
+    # FMAXNMP (scalar)
+    twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScDX", "SimdFloatCmpOp",
+                          ("uint32_t",), 2, fmaxnmCode)
+    twoRegPairwiseScInstX("fmaxnmp", "FmaxnmpScQX", "SimdFloatCmpOp",
+                          ("uint64_t",), 4, fmaxnmCode)
+    # FMAXNMP (vector)
+    threeEqualRegInstX("fmaxnmp", "FmaxnmpDX", "SimdFloatCmpOp",
+                       smallFloatTypes, 2, fmaxnmCode, pairwise=True)
+    threeEqualRegInstX("fmaxnmp", "FmaxnmpQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fmaxnmCode, pairwise=True)
+    # FMAXNMV
+    # Note: SimdFloatCmpOp can be a bit optimistic here
+    fpAcrossOp = fpOp % "fplib%s<Element>(destElem, srcElem1, fpscr)"
+    fmaxnmAcrossCode = fpAcrossOp % "MaxNum"
+    twoRegAcrossInstX("fmaxnmv", "FmaxnmvQX", "SimdFloatCmpOp", ("uint32_t",),
+                      4, fmaxnmAcrossCode)
+    # FMAXP (scalar)
+    twoRegPairwiseScInstX("fmaxp", "FmaxpScDX", "SimdFloatCmpOp",
+                          ("uint32_t",), 2, fmaxCode)
+    twoRegPairwiseScInstX("fmaxp", "FmaxpScQX", "SimdFloatCmpOp",
+                          ("uint64_t",), 4, fmaxCode)
+    # FMAXP (vector)
+    threeEqualRegInstX("fmaxp", "FmaxpDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, fmaxCode, pairwise=True)
+    threeEqualRegInstX("fmaxp", "FmaxpQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fmaxCode, pairwise=True)
+    # FMAXV
+    # Note: SimdFloatCmpOp can be a bit optimistic here
+    fmaxAcrossCode = fpAcrossOp % "Max"
+    twoRegAcrossInstX("fmaxv", "FmaxvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
+                      fmaxAcrossCode)
+    # FMIN
+    fminCode = fpBinOp % "Min"
+    threeEqualRegInstX("fmin", "FminDX", "SimdFloatCmpOp", smallFloatTypes, 2,
+                       fminCode)
+    threeEqualRegInstX("fmin", "FminQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fminCode)
+    # FMINNM
+    fminnmCode = fpBinOp % "MinNum"
+    threeEqualRegInstX("fminnm", "FminnmDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, fminnmCode)
+    threeEqualRegInstX("fminnm", "FminnmQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fminnmCode)
+    # FMINNMP (scalar)
+    twoRegPairwiseScInstX("fminnmp", "FminnmpScDX", "SimdFloatCmpOp",
+                          ("uint32_t",), 2, fminnmCode)
+    twoRegPairwiseScInstX("fminnmp", "FminnmpScQX", "SimdFloatCmpOp",
+                          ("uint64_t",), 4, fminnmCode)
+    # FMINNMP (vector)
+    threeEqualRegInstX("fminnmp", "FminnmpDX", "SimdFloatCmpOp",
+                       smallFloatTypes, 2, fminnmCode, pairwise=True)
+    threeEqualRegInstX("fminnmp", "FminnmpQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fminnmCode, pairwise=True)
+    # FMINNMV
+    # Note: SimdFloatCmpOp can be a bit optimistic here
+    fminnmAcrossCode = fpAcrossOp % "MinNum"
+    twoRegAcrossInstX("fminnmv", "FminnmvQX", "SimdFloatCmpOp", ("uint32_t",),
+                      4, fminnmAcrossCode)
+    # FMINP (scalar)
+    twoRegPairwiseScInstX("fminp", "FminpScDX", "SimdFloatCmpOp",
+                          ("uint32_t",), 2, fminCode)
+    twoRegPairwiseScInstX("fminp", "FminpScQX", "SimdFloatCmpOp",
+                          ("uint64_t",), 4, fminCode)
+    # FMINP (vector)
+    threeEqualRegInstX("fminp", "FminpDX", "SimdFloatCmpOp", smallFloatTypes,
+                       2, fminCode, pairwise=True)
+    threeEqualRegInstX("fminp", "FminpQX", "SimdFloatCmpOp", floatTypes, 4,
+                       fminCode, pairwise=True)
+    # FMINV
+    # Note: SimdFloatCmpOp can be a bit optimistic here
+    fminAcrossCode = fpAcrossOp % "Min"
+    twoRegAcrossInstX("fminv", "FminvQX", "SimdFloatCmpOp", ("uint32_t",), 4,
+                      fminAcrossCode)
+    # FMLA (by element)
+    fmlaCode = fpOp % ("fplibMulAdd<Element>("
+                       "destElem, srcElem1, srcElem2, fpscr)")
+    threeEqualRegInstX("fmla", "FmlaElemDX", "SimdFloatMultAccOp",
+                       smallFloatTypes, 2, fmlaCode, True, byElem=True)
+    threeEqualRegInstX("fmla", "FmlaElemQX", "SimdFloatMultAccOp", floatTypes,
+                       4, fmlaCode, True, byElem=True)
+    threeEqualRegInstX("fmla", "FmlaElemScX", "SimdFloatMultAccOp", floatTypes,
+                       4, fmlaCode, True, byElem=True, scalar=True)
+    # FMLA (vector)
+    threeEqualRegInstX("fmla", "FmlaDX", "SimdFloatMultAccOp", smallFloatTypes,
+                       2, fmlaCode, True)
+    threeEqualRegInstX("fmla", "FmlaQX", "SimdFloatMultAccOp", floatTypes, 4,
+                       fmlaCode, True)
+    # FMLS (by element)
+    fmlsCode = fpOp % ("fplibMulAdd<Element>(destElem,"
+                       " fplibNeg<Element>(srcElem1), srcElem2, fpscr)")
+    threeEqualRegInstX("fmls", "FmlsElemDX", "SimdFloatMultAccOp",
+                       smallFloatTypes, 2, fmlsCode, True, byElem=True)
+    threeEqualRegInstX("fmls", "FmlsElemQX", "SimdFloatMultAccOp", floatTypes,
+                       4, fmlsCode, True, byElem=True)
+    threeEqualRegInstX("fmls", "FmlsElemScX", "SimdFloatMultAccOp", floatTypes,
+                       4, fmlsCode, True, byElem=True, scalar=True)
+    # FMLS (vector)
+    threeEqualRegInstX("fmls", "FmlsDX", "SimdFloatMultAccOp", smallFloatTypes,
+                       2, fmlsCode, True)
+    threeEqualRegInstX("fmls", "FmlsQX", "SimdFloatMultAccOp", floatTypes, 4,
+                       fmlsCode, True)
+    # FMOV
+    fmovCode = 'destElem = imm;'
+    oneRegImmInstX("fmov", "FmovDX", "SimdMiscOp", smallFloatTypes, 2,
+                   fmovCode)
+    oneRegImmInstX("fmov", "FmovQX", "SimdMiscOp", floatTypes, 4, fmovCode)
+    # FMUL (by element)
+    fmulCode = fpBinOp % "Mul"
+    threeEqualRegInstX("fmul", "FmulElemDX", "SimdFloatMultOp",
+                       smallFloatTypes, 2, fmulCode, byElem=True)
+    threeEqualRegInstX("fmul", "FmulElemQX", "SimdFloatMultOp", floatTypes, 4,
+                       fmulCode, byElem=True)
+    threeEqualRegInstX("fmul", "FmulElemScX", "SimdFloatMultOp", floatTypes, 4,
+                       fmulCode, byElem=True, scalar=True)
+    # FMUL (vector)
+    threeEqualRegInstX("fmul", "FmulDX", "SimdFloatMultOp", smallFloatTypes, 2,
+                       fmulCode)
+    threeEqualRegInstX("fmul", "FmulQX", "SimdFloatMultOp", floatTypes, 4,
+                       fmulCode)
+    # FMULX
+    fmulxCode = fpBinOp % "MulX"
+    threeEqualRegInstX("fmulx", "FmulxDX", "SimdFloatMultOp", smallFloatTypes,
+                       2, fmulxCode)
+    threeEqualRegInstX("fmulx", "FmulxQX", "SimdFloatMultOp", floatTypes, 4,
+                       fmulxCode)
+    threeEqualRegInstX("fmulx", "FmulxScX", "SimdFloatMultOp", floatTypes, 4,
+                       fmulxCode, scalar=True)
+    # FMULX (by element)
+    threeEqualRegInstX("fmulx", "FmulxElemDX", "SimdFloatMultOp",
+                       smallFloatTypes, 2, fmulxCode, byElem=True)
+    threeEqualRegInstX("fmulx", "FmulxElemQX", "SimdFloatMultOp", floatTypes,
+                       4, fmulxCode, byElem=True)
+    threeEqualRegInstX("fmulx", "FmulxElemScX", "SimdFloatMultOp", floatTypes,
+                       4, fmulxCode, byElem=True, scalar=True)
+    # FNEG
+    fnegCode = fpOp % "fplibNeg<Element>(srcElem1)"
+    twoEqualRegInstX("Neg", "FnegDX", "SimdFloatAluOp", smallFloatTypes, 2,
+                     fnegCode)
+    twoEqualRegInstX("Neg", "FnegQX", "SimdFloatAluOp", floatTypes, 4,
+                     fnegCode)
+    # FRECPE
+    frecpeCode = fpOp % "fplibRecipEstimate<Element>(srcElem1, fpscr)"
+    twoEqualRegInstX("frecpe", "FrecpeDX", "SimdFloatMultAccOp",
+                     smallFloatTypes, 2, frecpeCode)
+    twoEqualRegInstX("frecpe", "FrecpeQX", "SimdFloatMultAccOp", floatTypes, 4,
+                     frecpeCode)
+    twoEqualRegInstX("frecpe", "FrecpeScX", "SimdFloatMultAccOp", floatTypes,
+                     4, frecpeCode, scalar=True)
+    # FRECPS
+    frecpsCode = fpBinOp % "RecipStepFused"
+    threeEqualRegInstX("frecps", "FrecpsDX", "SimdFloatMultAccOp",
+                       smallFloatTypes, 2, frecpsCode)
+    threeEqualRegInstX("frecps", "FrecpsQX", "SimdFloatMultAccOp", floatTypes,
+                       4, frecpsCode)
+    threeEqualRegInstX("frecps", "FrecpsScX", "SimdFloatMultAccOp", floatTypes,
+                       4, frecpsCode, scalar=True)
+    # FRECPX
+    frecpxCode = fpOp % "fplibRecpX<Element>(srcElem1, fpscr)"
+    twoEqualRegInstX("frecpx", "FrecpxX", "SimdFloatMultAccOp", floatTypes, 4,
+                     frecpxCode, scalar=True)
+    # FRINTA
+    frintCode = fpOp % "fplibRoundInt<Element>(srcElem1, %s, %s, fpscr)"
+    frintaCode = frintCode % ("FPRounding_TIEAWAY", "false")
+    twoEqualRegInstX("frinta", "FrintaDX", "SimdCvtOp", smallFloatTypes, 2,
+                     frintaCode)
+    twoEqualRegInstX("frinta", "FrintaQX", "SimdCvtOp", floatTypes, 4,
+                     frintaCode)
+    # FRINTI
+    frintiCode = frintCode % ("FPCRRounding(fpscr)", "false")
+    twoEqualRegInstX("frinti", "FrintiDX", "SimdCvtOp", smallFloatTypes, 2,
+                     frintiCode)
+    twoEqualRegInstX("frinti", "FrintiQX", "SimdCvtOp", floatTypes, 4,
+                     frintiCode)
+    # FRINTM
+    frintmCode = frintCode % ("FPRounding_NEGINF", "false")
+    twoEqualRegInstX("frintm", "FrintmDX", "SimdCvtOp", smallFloatTypes, 2,
+                     frintmCode)
+    twoEqualRegInstX("frintm", "FrintmQX", "SimdCvtOp", floatTypes, 4,
+                     frintmCode)
+    # FRINTN
+    frintnCode = frintCode % ("FPRounding_TIEEVEN", "false")
+    twoEqualRegInstX("frintn", "FrintnDX", "SimdCvtOp", smallFloatTypes, 2,
+                     frintnCode)
+    twoEqualRegInstX("frintn", "FrintnQX", "SimdCvtOp", floatTypes, 4,
+                     frintnCode)
+    # FRINTP
+    frintpCode = frintCode % ("FPRounding_POSINF", "false")
+    twoEqualRegInstX("frintp", "FrintpDX", "SimdCvtOp", smallFloatTypes, 2,
+                     frintpCode)
+    twoEqualRegInstX("frintp", "FrintpQX", "SimdCvtOp", floatTypes, 4,
+                     frintpCode)
+    # FRINTX
+    frintxCode = frintCode % ("FPCRRounding(fpscr)", "true")
+    twoEqualRegInstX("frintx", "FrintxDX", "SimdCvtOp", smallFloatTypes, 2,
+                     frintxCode)
+    twoEqualRegInstX("frintx", "FrintxQX", "SimdCvtOp", floatTypes, 4,
+                     frintxCode)
+    # FRINTZ
+    frintzCode = frintCode % ("FPRounding_ZERO", "false")
+    twoEqualRegInstX("frintz", "FrintzDX", "SimdCvtOp", smallFloatTypes, 2,
+                     frintzCode)
+    twoEqualRegInstX("frintz", "FrintzQX", "SimdCvtOp", floatTypes, 4,
+                     frintzCode)
+    # FRSQRTE
+    frsqrteCode = fpOp % "fplibRSqrtEstimate<Element>(srcElem1, fpscr)"
+    twoEqualRegInstX("frsqrte", "FrsqrteDX", "SimdFloatSqrtOp",
+                     smallFloatTypes, 2, frsqrteCode)
+    twoEqualRegInstX("frsqrte", "FrsqrteQX", "SimdFloatSqrtOp", floatTypes, 4,
+                     frsqrteCode)
+    twoEqualRegInstX("frsqrte", "FrsqrteScX", "SimdFloatSqrtOp", floatTypes, 4,
+                     frsqrteCode, scalar=True)
+    # FRSQRTS
+    frsqrtsCode = fpBinOp % "RSqrtStepFused"
+    threeEqualRegInstX("frsqrts", "FrsqrtsDX", "SimdFloatMiscOp",
+                       smallFloatTypes, 2, frsqrtsCode)
+    threeEqualRegInstX("frsqrts", "FrsqrtsQX", "SimdFloatMiscOp", floatTypes,
+                       4, frsqrtsCode)
+    threeEqualRegInstX("frsqrts", "FrsqrtsScX", "SimdFloatMiscOp", floatTypes,
+                       4, frsqrtsCode, scalar=True)
+    # FSQRT
+    fsqrtCode = fpOp % "fplibSqrt<Element>(srcElem1, fpscr)"
+    twoEqualRegInstX("fsqrt", "FsqrtDX", "SimdFloatSqrtOp", smallFloatTypes, 2,
+                     fsqrtCode)
+    twoEqualRegInstX("fsqrt", "FsqrtQX", "SimdFloatSqrtOp", floatTypes, 4,
+                     fsqrtCode)
+    # FSUB
+    fsubCode = fpBinOp % "Sub"
+    threeEqualRegInstX("fsub", "FsubDX", "SimdFloatAddOp", smallFloatTypes, 2,
+                       fsubCode)
+    threeEqualRegInstX("fsub", "FsubQX", "SimdFloatAddOp", floatTypes, 4,
+                       fsubCode)
+    # INS (element)
+    insFromVecElemInstX("ins", "InsElemX", "SimdMiscOp", unsignedTypes, 4)
+    # INS (general register)
+    insFromGprInstX("ins", "InsGprWX", "SimdMiscOp", smallUnsignedTypes, 4,
+                    'W')
+    insFromGprInstX("ins", "InsGprXX", "SimdMiscOp", unsignedTypes, 4, 'X')
+    # MLA (by element)
+    mlaCode = "destElem += srcElem1 * srcElem2;"
+    threeEqualRegInstX("mla", "MlaElemDX", "SimdMultAccOp",
+                       ("uint16_t", "uint32_t"), 2, mlaCode, True, byElem=True)
+    threeEqualRegInstX("mla", "MlaElemQX", "SimdMultAccOp",
+                       ("uint16_t", "uint32_t"), 4, mlaCode, True, byElem=True)
+    # MLA (vector)
+    threeEqualRegInstX("mla", "MlaDX", "SimdMultAccOp", smallUnsignedTypes, 2,
+                       mlaCode, True)
+    threeEqualRegInstX("mla", "MlaQX", "SimdMultAccOp", smallUnsignedTypes, 4,
+                       mlaCode, True)
+    # MLS (by element)
+    mlsCode = "destElem -= srcElem1 * srcElem2;"
+    threeEqualRegInstX("mls", "MlsElemDX", "SimdMultAccOp",
+                       ("uint16_t", "uint32_t"), 2, mlsCode, True, byElem=True)
+    threeEqualRegInstX("mls", "MlsElemQX", "SimdMultAccOp",
+                       ("uint16_t", "uint32_t"), 4, mlsCode, True, byElem=True)
+    # MLS (vector)
+    threeEqualRegInstX("mls", "MlsDX", "SimdMultAccOp", smallUnsignedTypes, 2,
+                       mlsCode, True)
+    threeEqualRegInstX("mls", "MlsQX", "SimdMultAccOp", smallUnsignedTypes, 4,
+                       mlsCode, True)
+    # MOV (element) -> alias to INS (element)
+    # MOV (from general) -> alias to INS (general register)
+    # MOV (scalar) -> alias to DUP (element)
+    # MOV (to general) -> alias to UMOV
+    # MOV (vector) -> alias to ORR (register)
+    # MOVI
+    movImmCode = "destElem = imm;"
+    oneRegImmInstX("movi", "MoviDX", "SimdMiscOp", ("uint64_t",), 2,
+                   movImmCode)
+    oneRegImmInstX("movi", "MoviQX", "SimdMiscOp", ("uint64_t",), 4,
+                   movImmCode)
+    # MUL (by element)
+    mulCode = "destElem = srcElem1 * srcElem2;"
+    threeEqualRegInstX("mul", "MulElemDX", "SimdMultOp",
+                       ("uint16_t", "uint32_t"), 2, mulCode, byElem=True)
+    threeEqualRegInstX("mul", "MulElemQX", "SimdMultOp",
+                       ("uint16_t", "uint32_t"), 4, mulCode, byElem=True)
+    # MUL (vector)
+    threeEqualRegInstX("mul", "MulDX", "SimdMultOp", smallUnsignedTypes, 2,
+                       mulCode)
+    threeEqualRegInstX("mul", "MulQX", "SimdMultOp", smallUnsignedTypes, 4,
+                       mulCode)
+    # MVN
+    mvnCode = "destElem = ~srcElem1;"
+    twoEqualRegInstX("mvn", "MvnDX", "SimdAluOp", ("uint64_t",), 2, mvnCode)
+    twoEqualRegInstX("mvn", "MvnQX", "SimdAluOp", ("uint64_t",), 4, mvnCode)
+    # MVNI
+    mvniCode = "destElem = ~imm;"
+    oneRegImmInstX("mvni", "MvniDX", "SimdAluOp", ("uint64_t",), 2, mvniCode)
+    oneRegImmInstX("mvni", "MvniQX", "SimdAluOp", ("uint64_t",), 4, mvniCode)
+    # NEG
+    negCode = "destElem = -srcElem1;"
+    twoEqualRegInstX("neg", "NegDX", "SimdAluOp", signedTypes, 2, negCode)
+    twoEqualRegInstX("neg", "NegQX", "SimdAluOp", signedTypes, 4, negCode)
+    # NOT -> alias to MVN
+    # ORN
+    ornCode = "destElem = srcElem1 | ~srcElem2;"
+    threeEqualRegInstX("orn", "OrnDX", "SimdAluOp", ("uint64_t",), 2, ornCode)
+    threeEqualRegInstX("orn", "OrnQX", "SimdAluOp", ("uint64_t",), 4, ornCode)
+    # ORR (immediate)
+    orrImmCode = "destElem |= imm;"
+    oneRegImmInstX("orr", "OrrImmDX", "SimdAluOp", ("uint64_t",), 2,
+                   orrImmCode, True)
+    oneRegImmInstX("orr", "OrrImmQX", "SimdAluOp", ("uint64_t",), 4,
+                   orrImmCode, True)
+    # ORR (register)
+    orrCode = "destElem = srcElem1 | srcElem2;"
+    threeEqualRegInstX("orr", "OrrDX", "SimdAluOp", ("uint64_t",), 2, orrCode)
+    threeEqualRegInstX("orr", "OrrQX", "SimdAluOp", ("uint64_t",), 4, orrCode)
+    # PMUL
+    pmulCode = '''
+            destElem = 0;
+            for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
+                if (bits(srcElem2, j))
+                    destElem ^= srcElem1 << j;
+            }
+    '''
+    threeEqualRegInstX("pmul", "PmulDX", "SimdMultOp", ("uint8_t",), 2,
+                       pmulCode)
+    threeEqualRegInstX("pmul", "PmulQX", "SimdMultOp", ("uint8_t",), 4,
+                       pmulCode)
+    # PMULL, PMULL2
+    # Note: 64-bit PMULL is not available (Crypto. Extension)
+    pmullCode = '''
+            destElem = 0;
+            for (unsigned j = 0; j < sizeof(Element) * 8; j++) {
+                if (bits(srcElem2, j))
+                    destElem ^= (BigElement)srcElem1 << j;
+            }
+    '''
+    threeRegLongInstX("pmull", "PmullX", "SimdMultOp", ("uint8_t",), pmullCode)
+    threeRegLongInstX("pmull", "Pmull2X", "SimdMultOp", ("uint8_t",),
+                      pmullCode, hi=True)
+    # RADDHN, RADDHN2
+    raddhnCode = '''
+            destElem = ((BigElement)srcElem1 + (BigElement)srcElem2 +
+                        ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
+                       (sizeof(Element) * 8);
+    '''
+    threeRegNarrowInstX("raddhn", "RaddhnX", "SimdAddOp", smallUnsignedTypes,
+                        raddhnCode)
+    threeRegNarrowInstX("raddhn2", "Raddhn2X", "SimdAddOp", smallUnsignedTypes,
+                        raddhnCode, hi=True)
+    # RBIT
+    rbitCode = '''
+            destElem = 0;
+            Element temp = srcElem1;
+            for (int i = 0; i < 8 * sizeof(Element); i++) {
+                destElem = destElem  | ((temp & 0x1) <<
+                                        (8 * sizeof(Element) - 1 - i));
+                temp >>= 1;
+            }
+    '''
+    twoEqualRegInstX("rbit", "RbitDX", "SimdAluOp", ("uint8_t",), 2, rbitCode)
+    twoEqualRegInstX("rbit", "RbitQX", "SimdAluOp", ("uint8_t",), 4, rbitCode)
+    # REV16
+    rev16Code = '''
+            destElem = srcElem1;
+            unsigned groupSize = ((1 << 1) / sizeof(Element));
+            unsigned reverseMask = (groupSize - 1);
+            j = i ^ reverseMask;
+    '''
+    twoEqualRegInstX("rev16", "Rev16DX", "SimdAluOp", ("uint8_t",), 2,
+                     rev16Code)
+    twoEqualRegInstX("rev16", "Rev16QX", "SimdAluOp", ("uint8_t",), 4,
+                     rev16Code)
+    # REV32
+    rev32Code = '''
+            destElem = srcElem1;
+            unsigned groupSize = ((1 << 2) / sizeof(Element));
+            unsigned reverseMask = (groupSize - 1);
+            j = i ^ reverseMask;
+    '''
+    twoEqualRegInstX("rev32", "Rev32DX", "SimdAluOp", ("uint8_t", "uint16_t"),
+                     2, rev32Code)
+    twoEqualRegInstX("rev32", "Rev32QX", "SimdAluOp", ("uint8_t", "uint16_t"),
+                     4, rev32Code)
+    # REV64
+    rev64Code = '''
+            destElem = srcElem1;
+            unsigned groupSize = ((1 << 3) / sizeof(Element));
+            unsigned reverseMask = (groupSize - 1);
+            j = i ^ reverseMask;
+    '''
+    twoEqualRegInstX("rev64", "Rev64DX", "SimdAluOp", smallUnsignedTypes, 2,
+                     rev64Code)
+    twoEqualRegInstX("rev64", "Rev64QX", "SimdAluOp", smallUnsignedTypes, 4,
+                     rev64Code)
+    # RSHRN, RSHRN2
+    rshrnCode = '''
+            if (imm > sizeof(srcElem1) * 8) {
+                destElem = 0;
+            } else if (imm) {
+                Element rBit = bits(srcElem1, imm - 1);
+                destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+            } else {
+                destElem = srcElem1;
+            }
+    '''
+    twoRegNarrowInstX("rshrn", "RshrnX", "SimdShiftOp", smallUnsignedTypes,
+                      rshrnCode, hasImm=True)
+    twoRegNarrowInstX("rshrn2", "Rshrn2X", "SimdShiftOp", smallUnsignedTypes,
+                      rshrnCode, hasImm=True, hi=True)
+    # RSUBHN, RSUBHN2
+    rsubhnCode = '''
+            destElem = ((BigElement)srcElem1 - (BigElement)srcElem2 +
+                        ((BigElement)1 << (sizeof(Element) * 8 - 1))) >>
+                       (sizeof(Element) * 8);
+    '''
+    threeRegNarrowInstX("rsubhn", "RsubhnX", "SimdAddOp", smallTypes,
+                        rsubhnCode)
+    threeRegNarrowInstX("rsubhn2", "Rsubhn2X", "SimdAddOp", smallTypes,
+                        rsubhnCode, hi=True)
+    # SABA
+    abaCode = '''
+            destElem += (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
+                                                (srcElem2 - srcElem1);
+    '''
+    threeEqualRegInstX("saba", "SabaDX", "SimdAddAccOp", smallSignedTypes, 2,
+                       abaCode, True)
+    threeEqualRegInstX("saba", "SabaQX", "SimdAddAccOp", smallSignedTypes, 4,
+                       abaCode, True)
+    # SABAL, SABAL2
+    abalCode = '''
+            destElem += (srcElem1 > srcElem2) ?
+                ((BigElement)srcElem1 - (BigElement)srcElem2) :
+                ((BigElement)srcElem2 - (BigElement)srcElem1);
+    '''
+    threeRegLongInstX("sabal", "SabalX", "SimdAddAccOp", smallSignedTypes,
+                      abalCode, True)
+    threeRegLongInstX("sabal2", "Sabal2X", "SimdAddAccOp", smallSignedTypes,
+                      abalCode, True, hi=True)
+    # SABD
+    abdCode = '''
+            destElem = (srcElem1 > srcElem2) ? (srcElem1 - srcElem2) :
+                                               (srcElem2 - srcElem1);
+    '''
+    threeEqualRegInstX("sabd", "SabdDX", "SimdAddOp", smallSignedTypes, 2,
+                       abdCode)
+    threeEqualRegInstX("sabd", "SabdQX", "SimdAddOp", smallSignedTypes, 4,
+                       abdCode)
+    # SABDL, SABDL2
+    abdlCode = '''
+            destElem = (srcElem1 > srcElem2) ?
+                ((BigElement)srcElem1 - (BigElement)srcElem2) :
+                ((BigElement)srcElem2 - (BigElement)srcElem1);
+    '''
+    threeRegLongInstX("sabdl", "SabdlX", "SimdAddAccOp", smallSignedTypes,
+                      abdlCode, True)
+    threeRegLongInstX("sabdl2", "Sabdl2X", "SimdAddAccOp", smallSignedTypes,
+                      abdlCode, True, hi=True)
+    # SADALP
+    adalpCode = "destElem += (BigElement)srcElem1 + (BigElement)srcElem2;"
+    twoRegCondenseInstX("sadalp", "SadalpDX", "SimdAddOp", smallSignedTypes, 2,
+                        adalpCode, True)
+    twoRegCondenseInstX("sadalp", "SadalpQX", "SimdAddOp", smallSignedTypes, 4,
+                        adalpCode, True)
+    # SADDL, SADDL2
+    addlwCode = "destElem = (BigElement)srcElem1 + (BigElement)srcElem2;"
+    threeRegLongInstX("saddl", "SaddlX", "SimdAddAccOp", smallSignedTypes,
+                      addlwCode)
+    threeRegLongInstX("saddl2", "Saddl2X", "SimdAddAccOp", smallSignedTypes,
+                      addlwCode, hi=True)
+    # SADDLP
+    twoRegCondenseInstX("saddlp", "SaddlpDX", "SimdAddOp", smallSignedTypes, 2,
+                        addlwCode)
+    twoRegCondenseInstX("saddlp", "SaddlpQX", "SimdAddOp", smallSignedTypes, 4,
+                        addlwCode)
+    # SADDLV
+    # Note: SimdAddOp can be a bit optimistic here
+    addAcrossLongCode = "destElem += (BigElement)srcElem1;"
+    twoRegAcrossInstX("saddlv", "SaddlvDX", "SimdAddOp", ("int8_t", "int16_t"),
+                      2, addAcrossLongCode, long=True)
+    twoRegAcrossInstX("saddlv", "SaddlvQX", "SimdAddOp", ("int8_t", "int16_t"),
+                      4, addAcrossLongCode, long=True)
+    twoRegAcrossInstX("saddlv", "SaddlvBQX", "SimdAddOp", ("int32_t",), 4,
+                      addAcrossLongCode, doubleDest=True, long=True)
+    # SADDW, SADDW2
+    threeRegWideInstX("saddw", "SaddwX", "SimdAddAccOp", smallSignedTypes,
+                      addlwCode)
+    threeRegWideInstX("saddw2", "Saddw2X", "SimdAddAccOp", smallSignedTypes,
+                      addlwCode, hi=True)
+    # SCVTF (fixed-point)
+    scvtfFixedCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, imm,"
+                             " false, FPCRRounding(fpscr), fpscr)")
+    twoEqualRegInstX("scvtf", "ScvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
+                     scvtfFixedCode % 32, hasImm=True)
+    twoEqualRegInstX("scvtf", "ScvtfFixedSQX", "SimdCvtOp", smallFloatTypes, 4,
+                     scvtfFixedCode % 32, hasImm=True)
+    twoEqualRegInstX("scvtf", "ScvtfFixedDQX", "SimdCvtOp", ("uint64_t",), 4,
+                     scvtfFixedCode % 64, hasImm=True)
+    twoEqualRegInstX("scvtf", "ScvtfFixedScSX", "SimdCvtOp", smallFloatTypes,
+                     4, scvtfFixedCode % 32, hasImm=True, scalar=True)
+    twoEqualRegInstX("scvtf", "ScvtfFixedScDX", "SimdCvtOp", ("uint64_t",), 4,
+                     scvtfFixedCode % 64, hasImm=True, scalar=True)
+    # SCVTF (integer)
+    scvtfIntCode = fpOp % ("fplibFixedToFP<Element>((int%d_t) srcElem1, 0,"
+                           " false, FPCRRounding(fpscr), fpscr)")
+    twoEqualRegInstX("scvtf", "ScvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
+                     scvtfIntCode % 32)
+    twoEqualRegInstX("scvtf", "ScvtfIntSQX", "SimdCvtOp", smallFloatTypes, 4,
+                     scvtfIntCode % 32)
+    twoEqualRegInstX("scvtf", "ScvtfIntDQX", "SimdCvtOp", ("uint64_t",), 4,
+                     scvtfIntCode % 64)
+    twoEqualRegInstX("scvtf", "ScvtfIntScSX", "SimdCvtOp", smallFloatTypes, 4,
+                     scvtfIntCode % 32, scalar=True)
+    twoEqualRegInstX("scvtf", "ScvtfIntScDX", "SimdCvtOp", ("uint64_t",), 4,
+                     scvtfIntCode % 64, scalar=True)
+    # SHADD
+    haddCode = '''
+            Element carryBit =
+                (((unsigned)srcElem1 & 0x1) +
+                 ((unsigned)srcElem2 & 0x1)) >> 1;
+            // Use division instead of a shift to ensure the sign extension works
+            // right. The compiler will figure out if it can be a shift. Mask the
+            // inputs so they get truncated correctly.
+            destElem = (((srcElem1 & ~(Element)1) / 2) +
+                        ((srcElem2 & ~(Element)1) / 2)) + carryBit;
+    '''
+    threeEqualRegInstX("shadd", "ShaddDX", "SimdAddOp", smallSignedTypes, 2,
+                       haddCode)
+    threeEqualRegInstX("shadd", "ShaddQX", "SimdAddOp", smallSignedTypes, 4,
+                       haddCode)
+    # SHL
+    shlCode = '''
+            if (imm >= sizeof(Element) * 8)
+                destElem = (srcElem1 << (sizeof(Element) * 8 - 1)) << 1;
+            else
+                destElem = srcElem1 << imm;
+    '''
+    twoEqualRegInstX("shl", "ShlDX", "SimdShiftOp", unsignedTypes, 2, shlCode,
+                     hasImm=True)
+    twoEqualRegInstX("shl", "ShlQX", "SimdShiftOp", unsignedTypes, 4, shlCode,
+                     hasImm=True)
+    # SHLL, SHLL2
+    shllCode = "destElem = ((BigElement)srcElem1) << (sizeof(Element) * 8);"
+    twoRegLongInstX("shll", "ShllX", "SimdShiftOp", smallTypes, shllCode)
+    twoRegLongInstX("shll", "Shll2X", "SimdShiftOp", smallTypes, shllCode,
+                    hi=True)
+    # SHRN, SHRN2
+    shrnCode = '''
+            if (imm >= sizeof(srcElem1) * 8) {
+                destElem = 0;
+            } else {
+                destElem = srcElem1 >> imm;
+            }
+    '''
+    twoRegNarrowInstX("shrn", "ShrnX", "SimdShiftOp", smallUnsignedTypes,
+                      shrnCode, hasImm=True)
+    twoRegNarrowInstX("shrn2", "Shrn2X", "SimdShiftOp", smallUnsignedTypes,
+                      shrnCode, hasImm=True, hi=True)
+    # SHSUB
+    hsubCode = '''
+            Element borrowBit =
+                (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
+            // Use division instead of a shift to ensure the sign extension works
+            // right. The compiler will figure out if it can be a shift. Mask the
+            // inputs so they get truncated correctly.
+            destElem = (((srcElem1 & ~(Element)1) / 2) -
+                        ((srcElem2 & ~(Element)1) / 2)) - borrowBit;
+    '''
+    threeEqualRegInstX("shsub", "ShsubDX", "SimdAddOp", smallSignedTypes, 2,
+                       hsubCode)
+    threeEqualRegInstX("shsub", "ShsubQX", "SimdAddOp", smallSignedTypes, 4,
+                       hsubCode)
+    # SLI
+    sliCode = '''
+            if (imm >= sizeof(Element) * 8)
+                destElem = destElem;
+            else
+                destElem = (srcElem1 << imm) | (destElem & mask(imm));
+    '''
+    twoEqualRegInstX("sli", "SliDX", "SimdShiftOp", unsignedTypes, 2, sliCode,
+                     True, hasImm=True)
+    twoEqualRegInstX("sli", "SliQX", "SimdShiftOp", unsignedTypes, 4, sliCode,
+                     True, hasImm=True)
+    # SMAX
+    maxCode = "destElem = (srcElem1 > srcElem2) ? srcElem1 : srcElem2;"
+    threeEqualRegInstX("smax", "SmaxDX", "SimdCmpOp", smallSignedTypes, 2,
+                       maxCode)
+    threeEqualRegInstX("smax", "SmaxQX", "SimdCmpOp", smallSignedTypes, 4,
+                       maxCode)
+    # SMAXP
+    threeEqualRegInstX("smaxp", "SmaxpDX", "SimdCmpOp", smallSignedTypes, 2,
+                       maxCode, pairwise=True)
+    threeEqualRegInstX("smaxp", "SmaxpQX", "SimdCmpOp", smallSignedTypes, 4,
+                       maxCode, pairwise=True)
+    # SMAXV
+    maxAcrossCode = '''
+            if (i == 0 || srcElem1 > destElem)
+                destElem = srcElem1;
+    '''
+    twoRegAcrossInstX("smaxv", "SmaxvDX", "SimdCmpOp", ("int8_t", "int16_t"),
+                      2, maxAcrossCode)
+    twoRegAcrossInstX("smaxv", "SmaxvQX", "SimdCmpOp", smallSignedTypes, 4,
+                      maxAcrossCode)
+    # SMIN
+    minCode = "destElem = (srcElem1 < srcElem2) ? srcElem1 : srcElem2;"
+    threeEqualRegInstX("smin", "SminDX", "SimdCmpOp", smallSignedTypes, 2,
+                       minCode)
+    threeEqualRegInstX("smin", "SminQX", "SimdCmpOp", smallSignedTypes, 4,
+                       minCode)
+    # SMINP
+    threeEqualRegInstX("sminp", "SminpDX", "SimdCmpOp", smallSignedTypes, 2,
+                       minCode, pairwise=True)
+    threeEqualRegInstX("sminp", "SminpQX", "SimdCmpOp", smallSignedTypes, 4,
+                       minCode, pairwise=True)
+    # SMINV
+    minAcrossCode = '''
+            if (i == 0 || srcElem1 < destElem)
+                destElem = srcElem1;
+    '''
+    twoRegAcrossInstX("sminv", "SminvDX", "SimdCmpOp", ("int8_t", "int16_t"),
+                      2, minAcrossCode)
+    twoRegAcrossInstX("sminv", "SminvQX", "SimdCmpOp", smallSignedTypes, 4,
+                      minAcrossCode)
+    # SMLAL, SMLAL2 (by element)
+    mlalCode = "destElem += (BigElement)srcElem1 * (BigElement)srcElem2;"
+    threeRegLongInstX("smlal", "SmlalElemX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), mlalCode, True, byElem=True)
+    threeRegLongInstX("smlal", "SmlalElem2X", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), mlalCode, True, byElem=True,
+                      hi=True)
+    # SMLAL, SMLAL2 (vector)
+    threeRegLongInstX("smlal", "SmlalX", "SimdMultAccOp", smallSignedTypes,
+                      mlalCode, True)
+    threeRegLongInstX("smlal", "Smlal2X", "SimdMultAccOp", smallSignedTypes,
+                      mlalCode, True, hi=True)
+    # SMLSL, SMLSL2 (by element)
+    mlslCode = "destElem -= (BigElement)srcElem1 * (BigElement)srcElem2;"
+    threeRegLongInstX("smlsl", "SmlslElemX", "SimdMultAccOp", smallSignedTypes,
+                      mlslCode, True, byElem=True)
+    threeRegLongInstX("smlsl", "SmlslElem2X", "SimdMultAccOp",
+                      smallSignedTypes, mlslCode, True, byElem=True, hi=True)
+    # SMLSL, SMLSL2 (vector)
+    threeRegLongInstX("smlsl", "SmlslX", "SimdMultAccOp", smallSignedTypes,
+                      mlslCode, True)
+    threeRegLongInstX("smlsl", "Smlsl2X", "SimdMultAccOp", smallSignedTypes,
+                      mlslCode, True, hi=True)
+    # SMOV
+    insToGprInstX("smov", "SmovWX", "SimdMiscOp", ("int8_t", "int16_t"), 4,
+                  'W', True)
+    insToGprInstX("smov", "SmovXX", "SimdMiscOp", smallSignedTypes, 4, 'X',
+                  True)
+    # SMULL, SMULL2 (by element)
+    mullCode = "destElem = (BigElement)srcElem1 * (BigElement)srcElem2;"
+    threeRegLongInstX("smull", "SmullElemX", "SimdMultOp", smallSignedTypes,
+                      mullCode, byElem=True)
+    threeRegLongInstX("smull", "SmullElem2X", "SimdMultOp", smallSignedTypes,
+                      mullCode, byElem=True, hi=True)
+    # SMULL, SMULL2 (vector)
+    threeRegLongInstX("smull", "SmullX", "SimdMultOp", smallSignedTypes,
+                      mullCode)
+    threeRegLongInstX("smull", "Smull2X", "SimdMultOp", smallSignedTypes,
+                      mullCode, hi=True)
+    # SQABS
+    sqabsCode = '''
+        FPSCR fpscr = (FPSCR) FpscrQc;
+        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
+            fpscr.qc = 1;
+            destElem = ~srcElem1;
+        } else if (srcElem1 < 0) {
+            destElem = -srcElem1;
+        } else {
+            destElem = srcElem1;
+        }
+        FpscrQc = fpscr;
+    '''
+    twoEqualRegInstX("sqabs", "SqabsDX", "SimdAluOp", smallSignedTypes, 2,
+                     sqabsCode)
+    twoEqualRegInstX("sqabs", "SqabsQX", "SimdAluOp", signedTypes, 4,
+                     sqabsCode)
+    twoEqualRegInstX("sqabs", "SqabsScX", "SimdAluOp", signedTypes, 4,
+                     sqabsCode, scalar=True)
+    # SQADD
+    sqaddCode = '''
+            destElem = srcElem1 + srcElem2;
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            bool negDest = (destElem < 0);
+            bool negSrc1 = (srcElem1 < 0);
+            bool negSrc2 = (srcElem2 < 0);
+            if ((negDest != negSrc1) && (negSrc1 == negSrc2)) {
+                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+                if (negDest)
+                    destElem -= 1;
+                fpscr.qc = 1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("sqadd", "SqaddDX", "SimdAddOp", smallSignedTypes, 2,
+                       sqaddCode)
+    threeEqualRegInstX("sqadd", "SqaddQX", "SimdAddOp", signedTypes, 4,
+                       sqaddCode)
+    threeEqualRegInstX("sqadd", "SqaddScX", "SimdAddOp", signedTypes, 4,
+                       sqaddCode, scalar=True)
+    # SQDMLAL, SQDMLAL2 (by element)
+    qdmlalCode = '''
+        FPSCR fpscr = (FPSCR) FpscrQc;
+        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+        Element halfNeg = maxNeg / 2;
+        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
+            fpscr.qc = 1;
+        }
+        bool negPreDest = ltz(destElem);
+        destElem += midElem;
+        bool negDest = ltz(destElem);
+        bool negMid = ltz(midElem);
+        if (negPreDest == negMid && negMid != negDest) {
+            destElem = mask(sizeof(BigElement) * 8 - 1);
+            if (negPreDest)
+                destElem = ~destElem;
+            fpscr.qc = 1;
+        }
+        FpscrQc = fpscr;
+    '''
+    threeRegLongInstX("sqdmlal", "SqdmlalElemX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlalCode, True, byElem=True)
+    threeRegLongInstX("sqdmlal", "SqdmlalElem2X", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
+                      hi=True)
+    threeRegLongInstX("sqdmlal", "SqdmlalElemScX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlalCode, True, byElem=True,
+                      scalar=True)
+    # SQDMLAL, SQDMLAL2 (vector)
+    threeRegLongInstX("sqdmlal", "SqdmlalX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlalCode, True)
+    threeRegLongInstX("sqdmlal", "Sqdmlal2X", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlalCode, True, hi=True)
+    threeRegLongInstX("sqdmlal", "SqdmlalScX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlalCode, True, scalar=True)
+    # SQDMLSL, SQDMLSL2 (by element)
+    qdmlslCode = '''
+        FPSCR fpscr = (FPSCR) FpscrQc;
+        BigElement midElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+        Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+        Element halfNeg = maxNeg / 2;
+        if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+            (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+            (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+            midElem = ~((BigElement)maxNeg << (sizeof(Element) * 8));
+            fpscr.qc = 1;
+        }
+        bool negPreDest = ltz(destElem);
+        destElem -= midElem;
+        bool negDest = ltz(destElem);
+        bool posMid = ltz((BigElement)-midElem);
+        if (negPreDest == posMid && posMid != negDest) {
+            destElem = mask(sizeof(BigElement) * 8 - 1);
+            if (negPreDest)
+                destElem = ~destElem;
+            fpscr.qc = 1;
+        }
+        FpscrQc = fpscr;
+    '''
+    threeRegLongInstX("sqdmlsl", "SqdmlslElemX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlslCode, True, byElem=True)
+    threeRegLongInstX("sqdmlsl", "SqdmlslElem2X", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
+                      hi=True)
+    threeRegLongInstX("sqdmlsl", "SqdmlslElemScX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlslCode, True, byElem=True,
+                      scalar=True)
+    # SQDMLSL, SQDMLSL2 (vector)
+    threeRegLongInstX("sqdmlsl", "SqdmlslX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlslCode, True)
+    threeRegLongInstX("sqdmlsl", "Sqdmlsl2X", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlslCode, True, hi=True)
+    threeRegLongInstX("sqdmlsl", "SqdmlslScX", "SimdMultAccOp",
+                      ("int16_t", "int32_t"), qdmlslCode, True, scalar=True)
+    # SQDMULH (by element)
+    sqdmulhCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2) >>
+                       (sizeof(Element) * 8);
+            if (srcElem1 == srcElem2 &&
+                    srcElem1 == (Element)((Element)1 <<
+                        (sizeof(Element) * 8 - 1))) {
+                destElem = ~srcElem1;
+                fpscr.qc = 1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("sqdmulh", "SqdmulhElemDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqdmulhCode, byElem=True)
+    threeEqualRegInstX("sqdmulh", "SqdmulhElemQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True)
+    threeEqualRegInstX("sqdmulh", "SqdmulhElemScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqdmulhCode, byElem=True,
+                       scalar=True)
+    # SQDMULH (vector)
+    threeEqualRegInstX("sqdmulh", "SqdmulhDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqdmulhCode)
+    threeEqualRegInstX("sqdmulh", "SqdmulhQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqdmulhCode)
+    threeEqualRegInstX("sqdmulh", "SqdmulhScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqdmulhCode, scalar=True)
+    # SQDMULL, SQDMULL2 (by element)
+    qdmullCode = '''
+        FPSCR fpscr = (FPSCR) FpscrQc;
+        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2);
+        if (srcElem1 == srcElem2 &&
+                srcElem1 == (Element)((Element)1 <<
+                    (Element)(sizeof(Element) * 8 - 1))) {
+            destElem = ~((BigElement)srcElem1 << (sizeof(Element) * 8));
+            fpscr.qc = 1;
+        }
+        FpscrQc = fpscr;
+    '''
+    threeRegLongInstX("sqdmull", "SqdmullElemX", "SimdMultOp",
+                      ("int16_t", "int32_t"), qdmullCode, True, byElem=True)
+    threeRegLongInstX("sqdmull", "SqdmullElem2X", "SimdMultOp",
+                      ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
+                      hi=True)
+    threeRegLongInstX("sqdmull", "SqdmullElemScX", "SimdMultOp",
+                      ("int16_t", "int32_t"), qdmullCode, True, byElem=True,
+                      scalar=True)
+    # SQDMULL, SQDMULL2 (vector)
+    threeRegLongInstX("sqdmull", "SqdmullX", "SimdMultOp",
+                      ("int16_t", "int32_t"), qdmullCode, True)
+    threeRegLongInstX("sqdmull", "Sqdmull2X", "SimdMultOp",
+                      ("int16_t", "int32_t"), qdmullCode, True, hi=True)
+    threeRegLongInstX("sqdmull", "SqdmullScX", "SimdMultOp",
+                      ("int16_t", "int32_t"), qdmullCode, True, scalar=True)
+    # SQNEG
+    sqnegCode = '''
+        FPSCR fpscr = (FPSCR) FpscrQc;
+        if (srcElem1 == (Element)((Element)1 << (sizeof(Element) * 8 - 1))) {
+            fpscr.qc = 1;
+            destElem = ~srcElem1;
+        } else {
+            destElem = -srcElem1;
+        }
+        FpscrQc = fpscr;
+    '''
+    twoEqualRegInstX("sqneg", "SqnegDX", "SimdAluOp", smallSignedTypes, 2,
+                     sqnegCode)
+    twoEqualRegInstX("sqneg", "SqnegQX", "SimdAluOp", signedTypes, 4,
+                     sqnegCode)
+    twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
+                     sqnegCode, scalar=True)
+    # SQRDMULH (by element)
+    sqrdmulhCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
+                        ((int64_t)1 << (sizeof(Element) * 8 - 1))) >>
+                       (sizeof(Element) * 8);
+            Element maxNeg = (Element)1 << (sizeof(Element) * 8 - 1);
+            Element halfNeg = maxNeg / 2;
+            if ((srcElem1 == maxNeg && srcElem2 == maxNeg) ||
+                (srcElem1 == halfNeg && srcElem2 == maxNeg) ||
+                (srcElem1 == maxNeg && srcElem2 == halfNeg)) {
+                if (destElem < 0) {
+                    destElem = mask(sizeof(Element) * 8 - 1);
+                } else {
+                    destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+                }
+                fpscr.qc = 1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("sqrdmulh", "SqrdmulhElemDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmulhCode, byElem=True)
+    threeEqualRegInstX("sqrdmulh", "SqrdmulhElemQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True)
+    threeEqualRegInstX("sqrdmulh", "SqrdmulhElemScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmulhCode, byElem=True,
+                       scalar=True)
+    # SQRDMULH (vector)
+    threeEqualRegInstX("sqrdmulh", "SqrdmulhDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmulhCode)
+    threeEqualRegInstX("sqrdmulh", "SqrdmulhQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmulhCode)
+    threeEqualRegInstX("sqrdmulh", "SqrdmulhScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmulhCode, scalar=True)
+    # SQRSHL
+    sqrshlCode = '''
+            int16_t shiftAmt = (int8_t)srcElem2;
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (shiftAmt < 0) {
+                shiftAmt = -shiftAmt;
+                Element rBit = 0;
+                if (shiftAmt <= sizeof(Element) * 8)
+                    rBit = bits(srcElem1, shiftAmt - 1);
+                if (shiftAmt > sizeof(Element) * 8 && srcElem1 < 0)
+                    rBit = 1;
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    shiftAmt = sizeof(Element) * 8 - 1;
+                    destElem = 0;
+                } else {
+                    destElem = (srcElem1 >> shiftAmt);
+                }
+                // Make sure the right shift sign extended when it should.
+                if (srcElem1 < 0 && destElem >= 0) {
+                    destElem |= -((Element)1 << (sizeof(Element) * 8 -
+                                                 1 - shiftAmt));
+                }
+                destElem += rBit;
+            } else if (shiftAmt > 0) {
+                bool sat = false;
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    if (srcElem1 != 0)
+                        sat = true;
+                    else
+                        destElem = 0;
+                } else {
+                    if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
+                                sizeof(Element) * 8 - 1 - shiftAmt) !=
+                            ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
+                        sat = true;
+                    } else {
+                        destElem = srcElem1 << shiftAmt;
+                    }
+                }
+                if (sat) {
+                    fpscr.qc = 1;
+                    destElem = mask(sizeof(Element) * 8 - 1);
+                    if (srcElem1 < 0)
+                        destElem = ~destElem;
+                }
+            } else {
+                destElem = srcElem1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("sqrshl", "SqrshlDX", "SimdCmpOp", smallSignedTypes, 2,
+                       sqrshlCode)
+    threeEqualRegInstX("sqrshl", "SqrshlQX", "SimdCmpOp", signedTypes, 4,
+                       sqrshlCode)
+    threeEqualRegInstX("sqrshl", "SqrshlScX", "SimdCmpOp", signedTypes, 4,
+                       sqrshlCode, scalar=True)
+    # SQRSHRN, SQRSHRN2
+    sqrshrnCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (imm > sizeof(srcElem1) * 8) {
+                if (srcElem1 != 0 && srcElem1 != -1)
+                    fpscr.qc = 1;
+                destElem = 0;
+            } else if (imm) {
+                BigElement mid = (srcElem1 >> (imm - 1));
+                uint64_t rBit = mid & 0x1;
+                mid >>= 1;
+                mid |= -(mid & ((BigElement)1 <<
+                            (sizeof(BigElement) * 8 - 1 - imm)));
+                mid += rBit;
+                if (mid != (Element)mid) {
+                    destElem = mask(sizeof(Element) * 8 - 1);
+                    if (srcElem1 < 0)
+                        destElem = ~destElem;
+                    fpscr.qc = 1;
+                } else {
+                    destElem = mid;
+                }
+            } else {
+                if (srcElem1 != (Element)srcElem1) {
+                    destElem = mask(sizeof(Element) * 8 - 1);
+                    if (srcElem1 < 0)
+                        destElem = ~destElem;
+                    fpscr.qc = 1;
+                } else {
+                    destElem = srcElem1;
+                }
+            }
+            FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("sqrshrn", "SqrshrnX", "SimdShiftOp", smallSignedTypes,
+                      sqrshrnCode, hasImm=True)
+    twoRegNarrowInstX("sqrshrn2", "Sqrshrn2X", "SimdShiftOp", smallSignedTypes,
+                      sqrshrnCode, hasImm=True, hi=True)
+    twoRegNarrowInstX("sqrshrn", "SqrshrnScX", "SimdShiftOp", smallSignedTypes,
+                      sqrshrnCode, hasImm=True, scalar=True)
+    # SQRSHRUN, SQRSHRUN2
+    sqrshrunCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (imm > sizeof(srcElem1) * 8) {
+                if (srcElem1 != 0)
+                    fpscr.qc = 1;
+                destElem = 0;
+            } else if (imm) {
+                BigElement mid = (srcElem1 >> (imm - 1));
+                uint64_t rBit = mid & 0x1;
+                mid >>= 1;
+                mid |= -(mid & ((BigElement)1 <<
+                                (sizeof(BigElement) * 8 - 1 - imm)));
+                mid += rBit;
+                if (bits(mid, sizeof(BigElement) * 8 - 1,
+                              sizeof(Element) * 8) != 0) {
+                    if (srcElem1 < 0) {
+                        destElem = 0;
+                    } else {
+                        destElem = mask(sizeof(Element) * 8);
+                    }
+                    fpscr.qc = 1;
+                } else {
+                    destElem = mid;
+                }
+            } else {
+                if (srcElem1 < 0) {
+                    fpscr.qc = 1;
+                    destElem = 0;
+                } else {
+                    destElem = srcElem1;
+                }
+            }
+            FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("sqrshrun", "SqrshrunX", "SimdShiftOp", smallSignedTypes,
+                      sqrshrunCode, hasImm=True)
+    twoRegNarrowInstX("sqrshrun", "Sqrshrun2X", "SimdShiftOp",
+                      smallSignedTypes, sqrshrunCode, hasImm=True, hi=True)
+    twoRegNarrowInstX("sqrshrun", "SqrshrunScX", "SimdShiftOp",
+                      smallSignedTypes, sqrshrunCode, hasImm=True, scalar=True)
+    # SQSHL (immediate)
+    sqshlImmCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (imm >= sizeof(Element) * 8) {
+                if (srcElem1 != 0) {
+                    destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+                    if (srcElem1 > 0)
+                        destElem = ~destElem;
+                    fpscr.qc = 1;
+                } else {
+                    destElem = 0;
+                }
+            } else if (imm) {
+                destElem = (srcElem1 << imm);
+                uint64_t topBits = bits((uint64_t)srcElem1,
+                                        sizeof(Element) * 8 - 1,
+                                        sizeof(Element) * 8 - 1 - imm);
+                if (topBits != 0 && topBits != mask(imm + 1)) {
+                    destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+                    if (srcElem1 > 0)
+                        destElem = ~destElem;
+                    fpscr.qc = 1;
+                }
+            } else {
+                destElem = srcElem1;
+            }
+            FpscrQc = fpscr;
+    '''
+    twoEqualRegInstX("sqshl", "SqshlImmDX", "SimdAluOp", smallSignedTypes, 2,
+                     sqshlImmCode, hasImm=True)
+    twoEqualRegInstX("sqshl", "SqshlImmQX", "SimdAluOp", signedTypes, 4,
+                     sqshlImmCode, hasImm=True)
+    twoEqualRegInstX("sqshl", "SqshlImmScX", "SimdAluOp", signedTypes, 4,
+                     sqshlImmCode, hasImm=True, scalar=True)
+    # SQSHL (register)
+    sqshlCode = '''
+            int16_t shiftAmt = (int8_t)srcElem2;
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (shiftAmt < 0) {
+                shiftAmt = -shiftAmt;
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    shiftAmt = sizeof(Element) * 8 - 1;
+                    destElem = 0;
+                } else {
+                    destElem = (srcElem1 >> shiftAmt);
+                }
+                // Make sure the right shift sign extended when it should.
+                if (srcElem1 < 0 && destElem >= 0) {
+                    destElem |= -((Element)1 << (sizeof(Element) * 8 -
+                                                 1 - shiftAmt));
+                }
+            } else if (shiftAmt > 0) {
+                bool sat = false;
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    if (srcElem1 != 0)
+                        sat = true;
+                    else
+                        destElem = 0;
+                } else {
+                    if (bits((uint64_t) srcElem1, sizeof(Element) * 8 - 1,
+                                sizeof(Element) * 8 - 1 - shiftAmt) !=
+                            ((srcElem1 < 0) ? mask(shiftAmt + 1) : 0)) {
+                        sat = true;
+                    } else {
+                        destElem = srcElem1 << shiftAmt;
+                    }
+                }
+                if (sat) {
+                    fpscr.qc = 1;
+                    destElem = mask(sizeof(Element) * 8 - 1);
+                    if (srcElem1 < 0)
+                        destElem = ~destElem;
+                }
+            } else {
+                destElem = srcElem1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("sqshl", "SqshlDX", "SimdAluOp", smallSignedTypes, 2,
+                       sqshlCode)
+    threeEqualRegInstX("sqshl", "SqshlQX", "SimdAluOp", signedTypes, 4,
+                       sqshlCode)
+    threeEqualRegInstX("sqshl", "SqshlScX", "SimdAluOp", signedTypes, 4,
+                       sqshlCode, scalar=True)
+    # SQSHLU
+    sqshluCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (imm >= sizeof(Element) * 8) {
+                if (srcElem1 < 0) {
+                    destElem = 0;
+                    fpscr.qc = 1;
+                } else if (srcElem1 > 0) {
+                    destElem = mask(sizeof(Element) * 8);
+                    fpscr.qc = 1;
+                } else {
+                    destElem = 0;
+                }
+            } else if (imm) {
+                destElem = (srcElem1 << imm);
+                uint64_t topBits = bits((uint64_t)srcElem1,
+                                        sizeof(Element) * 8 - 1,
+                                        sizeof(Element) * 8 - imm);
+                if (srcElem1 < 0) {
+                    destElem = 0;
+                    fpscr.qc = 1;
+                } else if (topBits != 0) {
+                    destElem = mask(sizeof(Element) * 8);
+                    fpscr.qc = 1;
+                }
+            } else {
+                if (srcElem1 < 0) {
+                    fpscr.qc = 1;
+                    destElem = 0;
+                } else {
+                    destElem = srcElem1;
+                }
+            }
+            FpscrQc = fpscr;
+    '''
+    twoEqualRegInstX("sqshlu", "SqshluDX", "SimdAluOp", smallSignedTypes, 2,
+                     sqshluCode, hasImm=True)
+    twoEqualRegInstX("sqshlu", "SqshluQX", "SimdAluOp", signedTypes, 4,
+                     sqshluCode, hasImm=True)
+    twoEqualRegInstX("sqshlu", "SqshluScX", "SimdAluOp", signedTypes, 4,
+                     sqshluCode, hasImm=True, scalar=True)
+    # SQSHRN, SQSHRN2
+    sqshrnCode = '''
+        FPSCR fpscr = (FPSCR) FpscrQc;
+        if (imm > sizeof(srcElem1) * 8) {
+            if (srcElem1 != 0 && srcElem1 != -1)
+                fpscr.qc = 1;
+            destElem = 0;
+        } else if (imm) {
+            BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+            mid |= -(mid & ((BigElement)1 <<
+                        (sizeof(BigElement) * 8 - 1 - imm)));
+            if (mid != (Element)mid) {
+                destElem = mask(sizeof(Element) * 8 - 1);
+                if (srcElem1 < 0)
+                    destElem = ~destElem;
+                fpscr.qc = 1;
+            } else {
+                destElem = mid;
+            }
+        } else {
+            destElem = srcElem1;
+        }
+        FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("sqshrn", "SqshrnX", "SimdShiftOp", smallSignedTypes,
+                      sqshrnCode, hasImm=True)
+    twoRegNarrowInstX("sqshrn2", "Sqshrn2X", "SimdShiftOp", smallSignedTypes,
+                      sqshrnCode, hasImm=True, hi=True)
+    twoRegNarrowInstX("sqshrn", "SqshrnScX", "SimdShiftOp", smallSignedTypes,
+                      sqshrnCode, hasImm=True, scalar=True)
+    # SQSHRUN, SQSHRUN2
+    sqshrunCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (imm > sizeof(srcElem1) * 8) {
+                if (srcElem1 != 0)
+                    fpscr.qc = 1;
+                destElem = 0;
+            } else if (imm) {
+                BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+                if (bits(mid, sizeof(BigElement) * 8 - 1,
+                              sizeof(Element) * 8) != 0) {
+                    if (srcElem1 < 0) {
+                        destElem = 0;
+                    } else {
+                        destElem = mask(sizeof(Element) * 8);
+                    }
+                    fpscr.qc = 1;
+                } else {
+                    destElem = mid;
+                }
+            } else {
+                destElem = srcElem1;
+            }
+            FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("sqshrun", "SqshrunX", "SimdShiftOp", smallSignedTypes,
+                      sqshrunCode, hasImm=True)
+    twoRegNarrowInstX("sqshrun", "Sqshrun2X", "SimdShiftOp", smallSignedTypes,
+                      sqshrunCode, hasImm=True, hi=True)
+    twoRegNarrowInstX("sqshrun", "SqshrunScX", "SimdShiftOp", smallSignedTypes,
+                      sqshrunCode, hasImm=True, scalar=True)
+    # SQSUB
+    sqsubCode = '''
+            destElem = srcElem1 - srcElem2;
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            bool negDest = (destElem < 0);
+            bool negSrc1 = (srcElem1 < 0);
+            bool posSrc2 = (srcElem2 >= 0);
+            if ((negDest != negSrc1) && (negSrc1 == posSrc2)) {
+                destElem = (Element)1 << (sizeof(Element) * 8 - 1);
+                if (negDest)
+                    destElem -= 1;
+                fpscr.qc = 1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("sqsub", "SqsubDX", "SimdAddOp", smallSignedTypes, 2,
+                       sqsubCode)
+    threeEqualRegInstX("sqsub", "SqsubQX", "SimdAddOp", signedTypes, 4,
+                       sqsubCode)
+    threeEqualRegInstX("sqsub", "SqsubScX", "SimdAddOp", signedTypes, 4,
+                       sqsubCode, scalar=True)
+    # SQXTN, SQXTN2
+    sqxtnCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            destElem = srcElem1;
+            if ((BigElement)destElem != srcElem1) {
+                fpscr.qc = 1;
+                destElem = mask(sizeof(Element) * 8 - 1);
+                if (srcElem1 < 0)
+                    destElem = ~destElem;
+            }
+            FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("sqxtn", "SqxtnX", "SimdMiscOp", smallSignedTypes,
+                      sqxtnCode)
+    twoRegNarrowInstX("sqxtn", "Sqxtn2X", "SimdMiscOp", smallSignedTypes,
+                      sqxtnCode, hi=True)
+    twoRegNarrowInstX("sqxtn", "SqxtnScX", "SimdMiscOp", smallSignedTypes,
+                      sqxtnCode, scalar=True)
+    # SQXTUN, SQXTUN2
+    sqxtunCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            destElem = srcElem1;
+            if (srcElem1 < 0 ||
+                    ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
+                fpscr.qc = 1;
+                destElem = mask(sizeof(Element) * 8);
+                if (srcElem1 < 0)
+                    destElem = ~destElem;
+            }
+            FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("sqxtun", "SqxtunX", "SimdMiscOp", smallSignedTypes,
+                      sqxtunCode)
+    twoRegNarrowInstX("sqxtun", "Sqxtun2X", "SimdMiscOp", smallSignedTypes,
+                      sqxtunCode, hi=True)
+    twoRegNarrowInstX("sqxtun", "SqxtunScX", "SimdMiscOp", smallSignedTypes,
+                      sqxtunCode, scalar=True)
+    # SRHADD
+    rhaddCode = '''
+            Element carryBit =
+                (((unsigned)srcElem1 & 0x1) +
+                 ((unsigned)srcElem2 & 0x1) + 1) >> 1;
+            // Use division instead of a shift to ensure the sign extension works
+            // right. The compiler will figure out if it can be a shift. Mask the
+            // inputs so they get truncated correctly.
+            destElem = (((srcElem1 & ~(Element)1) / 2) +
+                        ((srcElem2 & ~(Element)1) / 2)) + carryBit;
+    '''
+    threeEqualRegInstX("srhadd", "SrhaddDX", "SimdAddOp", smallSignedTypes, 2,
+                       rhaddCode)
+    threeEqualRegInstX("srhadd", "SrhaddQX", "SimdAddOp", smallSignedTypes, 4,
+                       rhaddCode)
+    # SRI
+    sriCode = '''
+            if (imm >= sizeof(Element) * 8)
+                destElem = destElem;
+            else
+                destElem = (srcElem1 >> imm) |
+                    (destElem & ~mask(sizeof(Element) * 8 - imm));
+    '''
+    twoEqualRegInstX("sri", "SriDX", "SimdShiftOp", unsignedTypes, 2, sriCode,
+                     True, hasImm=True)
+    twoEqualRegInstX("sri", "SriQX", "SimdShiftOp", unsignedTypes, 4, sriCode,
+                     True, hasImm=True)
+    # SRSHL
+    rshlCode = '''
+            int16_t shiftAmt = (int8_t)srcElem2;
+            if (shiftAmt < 0) {
+                shiftAmt = -shiftAmt;
+                Element rBit = 0;
+                if (shiftAmt <= sizeof(Element) * 8)
+                    rBit = bits(srcElem1, shiftAmt - 1);
+                if (shiftAmt > sizeof(Element) * 8 && ltz(srcElem1))
+                    rBit = 1;
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    shiftAmt = sizeof(Element) * 8 - 1;
+                    destElem = 0;
+                } else {
+                    destElem = (srcElem1 >> shiftAmt);
+                }
+                // Make sure the right shift sign extended when it should.
+                if (ltz(srcElem1) && !ltz(destElem)) {
+                    destElem |= -((Element)1 << (sizeof(Element) * 8 -
+                                                 1 - shiftAmt));
+                }
+                destElem += rBit;
+            } else if (shiftAmt > 0) {
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    destElem = 0;
+                } else {
+                    destElem = srcElem1 << shiftAmt;
+                }
+            } else {
+                destElem = srcElem1;
+            }
+    '''
+    threeEqualRegInstX("srshl", "SrshlDX", "SimdShiftOp", signedTypes, 2,
+                       rshlCode)
+    threeEqualRegInstX("srshl", "SrshlQX", "SimdShiftOp", signedTypes, 4,
+                       rshlCode)
+    # SRSHR
+    rshrCode = '''
+            if (imm > sizeof(srcElem1) * 8) {
+                destElem = 0;
+            } else if (imm) {
+                Element rBit = bits(srcElem1, imm - 1);
+                destElem = ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+            } else {
+                destElem = srcElem1;
+            }
+    '''
+    twoEqualRegInstX("srshr", "SrshrDX", "SimdShiftOp", signedTypes, 2,
+                     rshrCode, hasImm=True)
+    twoEqualRegInstX("srshr", "SrshrQX", "SimdShiftOp", signedTypes, 4,
+                     rshrCode, hasImm=True)
+    # SRSRA
+    rsraCode = '''
+            if (imm > sizeof(srcElem1) * 8) {
+                destElem += 0;
+            } else if (imm) {
+                Element rBit = bits(srcElem1, imm - 1);
+                destElem += ((srcElem1 >> (imm - 1)) >> 1) + rBit;
+            } else {
+                destElem += srcElem1;
+            }
+    '''
+    twoEqualRegInstX("srsra", "SrsraDX", "SimdShiftOp", signedTypes, 2,
+                     rsraCode, True, hasImm=True)
+    twoEqualRegInstX("srsra", "SrsraQX", "SimdShiftOp", signedTypes, 4,
+                     rsraCode, True, hasImm=True)
+    # SSHL
+    shlCode = '''
+            int16_t shiftAmt = (int8_t)srcElem2;
+            if (shiftAmt < 0) {
+                shiftAmt = -shiftAmt;
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    shiftAmt = sizeof(Element) * 8 - 1;
+                    destElem = 0;
+                } else {
+                    destElem = (srcElem1 >> shiftAmt);
+                }
+                // Make sure the right shift sign extended when it should.
+                if (ltz(srcElem1) && !ltz(destElem)) {
+                    destElem |= -((Element)1 << (sizeof(Element) * 8 -
+                                                 1 - shiftAmt));
+                }
+            } else {
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    destElem = 0;
+                } else {
+                    destElem = srcElem1 << shiftAmt;
+                }
+            }
+    '''
+    threeEqualRegInstX("sshl", "SshlDX", "SimdShiftOp", signedTypes, 2,
+                       shlCode)
+    threeEqualRegInstX("sshl", "SshlQX", "SimdShiftOp", signedTypes, 4,
+                       shlCode)
+    # SSHLL, SSHLL2
+    shllCode = '''
+            if (imm >= sizeof(destElem) * 8) {
+                destElem = 0;
+            } else {
+                destElem = (BigElement)srcElem1 << imm;
+            }
+    '''
+    twoRegLongInstX("sshll", "SshllX", "SimdShiftOp", smallSignedTypes,
+                    shllCode, hasImm=True)
+    twoRegLongInstX("sshll", "Sshll2X", "SimdShiftOp", smallSignedTypes,
+                    shllCode, hasImm=True, hi=True)
+    # SSHR
+    shrCode = '''
+            if (imm >= sizeof(srcElem1) * 8) {
+                if (ltz(srcElem1))
+                    destElem = -1;
+                else
+                    destElem = 0;
+            } else {
+                destElem = srcElem1 >> imm;
+            }
+    '''
+    twoEqualRegInstX("sshr", "SshrDX", "SimdShiftOp", signedTypes, 2, shrCode,
+                     hasImm=True)
+    twoEqualRegInstX("sshr", "SshrQX", "SimdShiftOp", signedTypes, 4, shrCode,
+                     hasImm=True)
+    # SSRA
+    sraCode = '''
+            Element mid;;
+            if (imm >= sizeof(srcElem1) * 8) {
+                mid = ltz(srcElem1) ? -1 : 0;
+            } else {
+                mid = srcElem1 >> imm;
+                if (ltz(srcElem1) && !ltz(mid)) {
+                    mid |= -(mid & ((Element)1 <<
+                                    (sizeof(Element) * 8 - 1 - imm)));
+                }
+            }
+            destElem += mid;
+    '''
+    twoEqualRegInstX("ssra", "SsraDX", "SimdShiftOp", signedTypes, 2, sraCode,
+                     True, hasImm=True)
+    twoEqualRegInstX("ssra", "SsraQX", "SimdShiftOp", signedTypes, 4, sraCode,
+                     True, hasImm=True)
+    # SSUBL
+    sublwCode = "destElem = (BigElement)srcElem1 - (BigElement)srcElem2;"
+    threeRegLongInstX("ssubl", "SsublX", "SimdAddOp", smallSignedTypes,
+                      sublwCode)
+    threeRegLongInstX("ssubl2", "Ssubl2X", "SimdAddOp", smallSignedTypes,
+                      sublwCode, hi=True)
+    # SSUBW
+    threeRegWideInstX("ssubw", "SsubwX", "SimdAddOp", smallSignedTypes,
+                      sublwCode)
+    threeRegWideInstX("ssubw2", "Ssubw2X", "SimdAddOp", smallSignedTypes,
+                      sublwCode, hi=True)
+    # SUB
+    subCode = "destElem = srcElem1 - srcElem2;"
+    threeEqualRegInstX("sub", "SubDX", "SimdAddOp", unsignedTypes, 2, subCode)
+    threeEqualRegInstX("sub", "SubQX", "SimdAddOp", unsignedTypes, 4, subCode)
+    # SUBHN, SUBHN2
+    subhnCode = '''
+            destElem = ((BigElement)srcElem1 - (BigElement)srcElem2) >>
+                        (sizeof(Element) * 8);
+    '''
+    threeRegNarrowInstX("subhn", "SubhnX", "SimdAddOp", smallUnsignedTypes,
+                        subhnCode)
+    threeRegNarrowInstX("subhn2", "Subhn2X", "SimdAddOp", smallUnsignedTypes,
+                        subhnCode, hi=True)
+    # SUQADD
+    suqaddCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            Element tmp = destElem + srcElem1;
+            if (bits(destElem, sizeof(Element) * 8 - 1) == 0) {
+                if (bits(tmp, sizeof(Element) * 8 - 1) == 1 ||
+                        tmp < srcElem1 || tmp < destElem) {
+                    destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
+                    fpscr.qc = 1;
+                } else {
+                    destElem = tmp;
+                }
+            } else {
+                Element absDestElem = (~destElem) + 1;
+                if (absDestElem < srcElem1) {
+                    // Still check for positive sat., no need to check for negative sat.
+                    if (bits(tmp, sizeof(Element) * 8 - 1) == 1) {
+                        destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
+                        fpscr.qc = 1;
+                    } else {
+                        destElem = tmp;
+                    }
+                } else {
+                    destElem = tmp;
+                }
+            }
+            FpscrQc = fpscr;
+    '''
+    twoEqualRegInstX("suqadd", "SuqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
+                     suqaddCode, True)
+    twoEqualRegInstX("suqadd", "SuqaddQX", "SimdAddOp", unsignedTypes, 4,
+                     suqaddCode, True)
+    twoEqualRegInstX("suqadd", "SuqaddScX", "SimdAddOp", unsignedTypes, 4,
+                     suqaddCode, True, scalar=True)
+    # SXTL -> alias to SSHLL
+    # TBL
+    tbxTblInstX("tbl", "Tbl1DX", "SimdMiscOp", ("uint8_t",), 1, "true", 2)
+    tbxTblInstX("tbl", "Tbl1QX", "SimdMiscOp", ("uint8_t",), 1, "true", 4)
+    tbxTblInstX("tbl", "Tbl2DX", "SimdMiscOp", ("uint8_t",), 2, "true", 2)
+    tbxTblInstX("tbl", "Tbl2QX", "SimdMiscOp", ("uint8_t",), 2, "true", 4)
+    tbxTblInstX("tbl", "Tbl3DX", "SimdMiscOp", ("uint8_t",), 3, "true", 2)
+    tbxTblInstX("tbl", "Tbl3QX", "SimdMiscOp", ("uint8_t",), 3, "true", 4)
+    tbxTblInstX("tbl", "Tbl4DX", "SimdMiscOp", ("uint8_t",), 4, "true", 2)
+    tbxTblInstX("tbl", "Tbl4QX", "SimdMiscOp", ("uint8_t",), 4, "true", 4)
+    # TBX
+    tbxTblInstX("tbx", "Tbx1DX", "SimdMiscOp", ("uint8_t",), 1, "false", 2)
+    tbxTblInstX("tbx", "Tbx1QX", "SimdMiscOp", ("uint8_t",), 1, "false", 4)
+    tbxTblInstX("tbx", "Tbx2DX", "SimdMiscOp", ("uint8_t",), 2, "false", 2)
+    tbxTblInstX("tbx", "Tbx2QX", "SimdMiscOp", ("uint8_t",), 2, "false", 4)
+    tbxTblInstX("tbx", "Tbx3DX", "SimdMiscOp", ("uint8_t",), 3, "false", 2)
+    tbxTblInstX("tbx", "Tbx3QX", "SimdMiscOp", ("uint8_t",), 3, "false", 4)
+    tbxTblInstX("tbx", "Tbx4DX", "SimdMiscOp", ("uint8_t",), 4, "false", 2)
+    tbxTblInstX("tbx", "Tbx4QX", "SimdMiscOp", ("uint8_t",), 4, "false", 4)
+    # TRN1
+    trnCode = '''
+        unsigned part = %s;
+        for (unsigned i = 0; i < eCount / 2; i++) {
+            destReg.elements[2 * i] = srcReg1.elements[2 * i + part];
+            destReg.elements[2 * i + 1] = srcReg2.elements[2 * i + part];
+        }
+    '''
+    threeRegScrambleInstX("trn1", "Trn1DX", "SimdAluOp", smallUnsignedTypes, 2,
+                          trnCode % "0")
+    threeRegScrambleInstX("trn1", "Trn1QX", "SimdAluOp", unsignedTypes, 4,
+                          trnCode % "0")
+    # TRN2
+    threeRegScrambleInstX("trn2", "Trn2DX", "SimdAluOp", smallUnsignedTypes, 2,
+                          trnCode % "1")
+    threeRegScrambleInstX("trn2", "Trn2QX", "SimdAluOp", unsignedTypes, 4,
+                          trnCode % "1")
+    # UABA
+    threeEqualRegInstX("uaba", "UabaDX", "SimdAddAccOp", smallUnsignedTypes, 2,
+                       abaCode, True)
+    threeEqualRegInstX("uaba", "UabaQX", "SimdAddAccOp", smallUnsignedTypes, 4,
+                       abaCode, True)
+    # UABAL, UABAL2
+    threeRegLongInstX("uabal", "UabalX", "SimdAddAccOp", smallUnsignedTypes,
+                      abalCode, True)
+    threeRegLongInstX("uabal2", "Uabal2X", "SimdAddAccOp", smallUnsignedTypes,
+                      abalCode, True, hi=True)
+    # UABD
+    threeEqualRegInstX("uabd", "UabdDX", "SimdAddOp", smallUnsignedTypes, 2,
+                       abdCode)
+    threeEqualRegInstX("uabd", "UabdQX", "SimdAddOp", smallUnsignedTypes, 4,
+                       abdCode)
+    # UABDL, UABDL2
+    threeRegLongInstX("uabdl", "UabdlX", "SimdAddAccOp", smallUnsignedTypes,
+                      abdlCode, True)
+    threeRegLongInstX("uabdl2", "Uabdl2X", "SimdAddAccOp", smallUnsignedTypes,
+                      abdlCode, True, hi=True)
+    # UADALP
+    twoRegCondenseInstX("uadalp", "UadalpDX", "SimdAddOp", smallUnsignedTypes,
+                        2, adalpCode, True)
+    twoRegCondenseInstX("uadalp", "UadalpQX", "SimdAddOp", smallUnsignedTypes,
+                        4, adalpCode, True)
+    # UADDL, UADDL2
+    threeRegLongInstX("uaddl", "UaddlX", "SimdAddAccOp", smallUnsignedTypes,
+                      addlwCode)
+    threeRegLongInstX("uaddl2", "Uaddl2X", "SimdAddAccOp", smallUnsignedTypes,
+                      addlwCode, hi=True)
+    # UADDLP
+    twoRegCondenseInstX("uaddlp", "UaddlpDX", "SimdAddOp", smallUnsignedTypes,
+                        2, addlwCode)
+    twoRegCondenseInstX("uaddlp", "UaddlpQX", "SimdAddOp", smallUnsignedTypes,
+                        4, addlwCode)
+    # UADDLV
+    twoRegAcrossInstX("uaddlv", "UaddlvDX", "SimdAddOp",
+                      ("uint8_t", "uint16_t"), 2, addAcrossLongCode, long=True)
+    twoRegAcrossInstX("uaddlv", "UaddlvQX", "SimdAddOp",
+                      ("uint8_t", "uint16_t"), 4, addAcrossLongCode, long=True)
+    twoRegAcrossInstX("uaddlv", "UaddlvBQX", "SimdAddOp", ("uint32_t",), 4,
+                      addAcrossLongCode, doubleDest=True, long=True)
+    # UADDW
+    threeRegWideInstX("uaddw", "UaddwX", "SimdAddAccOp", smallUnsignedTypes,
+                      addlwCode)
+    threeRegWideInstX("uaddw2", "Uaddw2X", "SimdAddAccOp", smallUnsignedTypes,
+                      addlwCode, hi=True)
+    # UCVTF (fixed-point)
+    ucvtfFixedCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, imm, true,"
+                             " FPCRRounding(fpscr), fpscr)")
+    twoEqualRegInstX("ucvtf", "UcvtfFixedDX", "SimdCvtOp", smallFloatTypes, 2,
+                     ucvtfFixedCode, hasImm=True)
+    twoEqualRegInstX("ucvtf", "UcvtfFixedQX", "SimdCvtOp", floatTypes, 4,
+                     ucvtfFixedCode, hasImm=True)
+    twoEqualRegInstX("ucvtf", "UcvtfFixedScX", "SimdCvtOp", floatTypes, 4,
+                     ucvtfFixedCode, hasImm=True, scalar=True)
+    # UCVTF (integer)
+    ucvtfIntCode = fpOp % ("fplibFixedToFP<Element>(srcElem1, 0, true,"
+                           " FPCRRounding(fpscr), fpscr)")
+    twoEqualRegInstX("ucvtf", "UcvtfIntDX", "SimdCvtOp", smallFloatTypes, 2,
+                     ucvtfIntCode)
+    twoEqualRegInstX("ucvtf", "UcvtfIntQX", "SimdCvtOp", floatTypes, 4,
+                     ucvtfIntCode)
+    twoEqualRegInstX("ucvtf", "UcvtfIntScX", "SimdCvtOp", floatTypes, 4,
+                     ucvtfIntCode, scalar=True)
+    # UHADD
+    threeEqualRegInstX("uhadd", "UhaddDX", "SimdAddOp", smallUnsignedTypes, 2,
+                       haddCode)
+    threeEqualRegInstX("uhadd", "UhaddQX", "SimdAddOp", smallUnsignedTypes, 4,
+                       haddCode)
+    # UHSUB
+    threeEqualRegInstX("uhsub", "UhsubDX", "SimdAddOp", smallUnsignedTypes, 2,
+                       hsubCode)
+    threeEqualRegInstX("uhsub", "UhsubQX", "SimdAddOp", smallUnsignedTypes, 4,
+                       hsubCode)
+    # UMAX
+    threeEqualRegInstX("umax", "UmaxDX", "SimdCmpOp", smallUnsignedTypes, 2,
+                       maxCode)
+    threeEqualRegInstX("umax", "UmaxQX", "SimdCmpOp", smallUnsignedTypes, 4,
+                       maxCode)
+    # UMAXP
+    threeEqualRegInstX("umaxp", "UmaxpDX", "SimdCmpOp", smallUnsignedTypes, 2,
+                       maxCode, pairwise=True)
+    threeEqualRegInstX("umaxp", "UmaxpQX", "SimdCmpOp", smallUnsignedTypes, 4,
+                       maxCode, pairwise=True)
+    # UMAXV
+    twoRegAcrossInstX("umaxv", "UmaxvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
+                      2, maxAcrossCode)
+    twoRegAcrossInstX("umaxv", "UmaxvQX", "SimdCmpOp", smallUnsignedTypes, 4,
+                      maxAcrossCode)
+    # UMIN
+    threeEqualRegInstX("umin", "UminDX", "SimdCmpOp", smallUnsignedTypes, 2,
+                       minCode)
+    threeEqualRegInstX("umin", "UminQX", "SimdCmpOp", smallUnsignedTypes, 4,
+                       minCode)
+    # UMINP
+    threeEqualRegInstX("uminp", "UminpDX", "SimdCmpOp", smallUnsignedTypes, 2,
+                       minCode, pairwise=True)
+    threeEqualRegInstX("uminp", "UminpQX", "SimdCmpOp", smallUnsignedTypes, 4,
+                       minCode, pairwise=True)
+    # UMINV
+    twoRegAcrossInstX("uminv", "UminvDX", "SimdCmpOp", ("uint8_t", "uint16_t"),
+                      2, minAcrossCode)
+    twoRegAcrossInstX("uminv", "UminvQX", "SimdCmpOp", smallUnsignedTypes, 4,
+                      minAcrossCode)
+    # UMLAL (by element)
+    threeRegLongInstX("umlal", "UmlalElemX", "SimdMultAccOp",
+                      smallUnsignedTypes, mlalCode, True, byElem=True)
+    threeRegLongInstX("umlal", "UmlalElem2X", "SimdMultAccOp",
+                      smallUnsignedTypes, mlalCode, True, byElem=True, hi=True)
+    # UMLAL (vector)
+    threeRegLongInstX("umlal", "UmlalX", "SimdMultAccOp", smallUnsignedTypes,
+                      mlalCode, True)
+    threeRegLongInstX("umlal", "Umlal2X", "SimdMultAccOp", smallUnsignedTypes,
+                      mlalCode, True, hi=True)
+    # UMLSL (by element)
+    threeRegLongInstX("umlsl", "UmlslElemX", "SimdMultAccOp",
+                      smallUnsignedTypes, mlslCode, True, byElem=True)
+    threeRegLongInstX("umlsl", "UmlslElem2X", "SimdMultAccOp",
+                      smallUnsignedTypes, mlslCode, True, byElem=True, hi=True)
+    # UMLSL (vector)
+    threeRegLongInstX("umlsl", "UmlslX", "SimdMultAccOp", smallUnsignedTypes,
+                      mlslCode, True)
+    threeRegLongInstX("umlsl", "Umlsl2X", "SimdMultAccOp", smallUnsignedTypes,
+                      mlslCode, True, hi=True)
+    # UMOV
+    insToGprInstX("umov", "UmovWX", "SimdMiscOp", smallUnsignedTypes, 4, 'W')
+    insToGprInstX("umov", "UmovXX", "SimdMiscOp", ("uint64_t",), 4, 'X')
+    # UMULL, UMULL2 (by element)
+    threeRegLongInstX("umull", "UmullElemX", "SimdMultOp", smallUnsignedTypes,
+                      mullCode, byElem=True)
+    threeRegLongInstX("umull", "UmullElem2X", "SimdMultOp", smallUnsignedTypes,
+                      mullCode, byElem=True, hi=True)
+    # UMULL, UMULL2 (vector)
+    threeRegLongInstX("umull", "UmullX", "SimdMultOp", smallUnsignedTypes,
+                      mullCode)
+    threeRegLongInstX("umull", "Umull2X", "SimdMultOp", smallUnsignedTypes,
+                      mullCode, hi=True)
+    # UQADD
+    uqaddCode = '''
+            destElem = srcElem1 + srcElem2;
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (destElem < srcElem1 || destElem < srcElem2) {
+                destElem = (Element)(-1);
+                fpscr.qc = 1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("uqadd", "UqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
+                       uqaddCode)
+    threeEqualRegInstX("uqadd", "UqaddQX", "SimdAddOp", unsignedTypes, 4,
+                       uqaddCode)
+    threeEqualRegInstX("uqadd", "UqaddScX", "SimdAddOp", unsignedTypes, 4,
+                       uqaddCode, scalar=True)
+    # UQRSHL
+    uqrshlCode = '''
+            int16_t shiftAmt = (int8_t)srcElem2;
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (shiftAmt < 0) {
+                shiftAmt = -shiftAmt;
+                Element rBit = 0;
+                if (shiftAmt <= sizeof(Element) * 8)
+                    rBit = bits(srcElem1, shiftAmt - 1);
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    shiftAmt = sizeof(Element) * 8 - 1;
+                    destElem = 0;
+                } else {
+                    destElem = (srcElem1 >> shiftAmt);
+                }
+                destElem += rBit;
+            } else {
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    if (srcElem1 != 0) {
+                        destElem = mask(sizeof(Element) * 8);
+                        fpscr.qc = 1;
+                    } else {
+                        destElem = 0;
+                    }
+                } else {
+                    if (bits(srcElem1, sizeof(Element) * 8 - 1,
+                                sizeof(Element) * 8 - shiftAmt)) {
+                        destElem = mask(sizeof(Element) * 8);
+                        fpscr.qc = 1;
+                    } else {
+                        destElem = srcElem1 << shiftAmt;
+                    }
+                }
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("uqrshl", "UqrshlDX", "SimdCmpOp", smallUnsignedTypes,
+                       2, uqrshlCode)
+    threeEqualRegInstX("uqrshl", "UqrshlQX", "SimdCmpOp", unsignedTypes, 4,
+                       uqrshlCode)
+    threeEqualRegInstX("uqrshl", "UqrshlScX", "SimdCmpOp", unsignedTypes, 4,
+                       uqrshlCode, scalar=True)
+    # UQRSHRN
+    uqrshrnCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (imm > sizeof(srcElem1) * 8) {
+                if (srcElem1 != 0)
+                    fpscr.qc = 1;
+                destElem = 0;
+            } else if (imm) {
+                BigElement mid = (srcElem1 >> (imm - 1));
+                uint64_t rBit = mid & 0x1;
+                mid >>= 1;
+                mid += rBit;
+                if (mid != (Element)mid) {
+                    destElem = mask(sizeof(Element) * 8);
+                    fpscr.qc = 1;
+                } else {
+                    destElem = mid;
+                }
+            } else {
+                if (srcElem1 != (Element)srcElem1) {
+                    destElem = mask(sizeof(Element) * 8 - 1);
+                    fpscr.qc = 1;
+                } else {
+                    destElem = srcElem1;
+                }
+            }
+            FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("uqrshrn", "UqrshrnX", "SimdShiftOp", smallUnsignedTypes,
+                      uqrshrnCode, hasImm=True)
+    twoRegNarrowInstX("uqrshrn2", "Uqrshrn2X", "SimdShiftOp",
+                      smallUnsignedTypes, uqrshrnCode, hasImm=True, hi=True)
+    twoRegNarrowInstX("uqrshrn", "UqrshrnScX", "SimdShiftOp",
+                      smallUnsignedTypes, uqrshrnCode, hasImm=True,
+                      scalar=True)
+    # UQSHL (immediate)
+    uqshlImmCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (imm >= sizeof(Element) * 8) {
+                if (srcElem1 != 0) {
+                    destElem = mask(sizeof(Element) * 8);
+                    fpscr.qc = 1;
+                } else {
+                    destElem = 0;
+                }
+            } else if (imm) {
+                destElem = (srcElem1 << imm);
+                uint64_t topBits = bits((uint64_t)srcElem1,
+                                        sizeof(Element) * 8 - 1,
+                                        sizeof(Element) * 8 - imm);
+                if (topBits != 0) {
+                    destElem = mask(sizeof(Element) * 8);
+                    fpscr.qc = 1;
+                }
+            } else {
+                destElem = srcElem1;
+            }
+            FpscrQc = fpscr;
+    '''
+    twoEqualRegInstX("uqshl", "UqshlImmDX", "SimdAluOp", smallUnsignedTypes, 2,
+                     uqshlImmCode, hasImm=True)
+    twoEqualRegInstX("uqshl", "UqshlImmQX", "SimdAluOp", unsignedTypes, 4,
+                     uqshlImmCode, hasImm=True)
+    twoEqualRegInstX("uqshl", "UqshlImmScX", "SimdAluOp", unsignedTypes, 4,
+                     uqshlImmCode, hasImm=True, scalar=True)
+    # UQSHL (register)
+    uqshlCode = '''
+            int16_t shiftAmt = (int8_t)srcElem2;
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (shiftAmt < 0) {
+                shiftAmt = -shiftAmt;
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    shiftAmt = sizeof(Element) * 8 - 1;
+                    destElem = 0;
+                } else {
+                    destElem = (srcElem1 >> shiftAmt);
+                }
+            } else if (shiftAmt > 0) {
+                if (shiftAmt >= sizeof(Element) * 8) {
+                    if (srcElem1 != 0) {
+                        destElem = mask(sizeof(Element) * 8);
+                        fpscr.qc = 1;
+                    } else {
+                        destElem = 0;
+                    }
+                } else {
+                    if (bits(srcElem1, sizeof(Element) * 8 - 1,
+                                sizeof(Element) * 8 - shiftAmt)) {
+                        destElem = mask(sizeof(Element) * 8);
+                        fpscr.qc = 1;
+                    } else {
+                        destElem = srcElem1 << shiftAmt;
+                    }
+                }
+            } else {
+                destElem = srcElem1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("uqshl", "UqshlDX", "SimdAluOp", smallUnsignedTypes, 2,
+                       uqshlCode)
+    threeEqualRegInstX("uqshl", "UqshlQX", "SimdAluOp", unsignedTypes, 4,
+                       uqshlCode)
+    threeEqualRegInstX("uqshl", "UqshlScX", "SimdAluOp", unsignedTypes, 4,
+                       uqshlCode, scalar=True)
+    # UQSHRN, UQSHRN2
+    uqshrnCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (imm > sizeof(srcElem1) * 8) {
+                if (srcElem1 != 0)
+                    fpscr.qc = 1;
+                destElem = 0;
+            } else if (imm) {
+                BigElement mid = ((srcElem1 >> (imm - 1)) >> 1);
+                if (mid != (Element)mid) {
+                    destElem = mask(sizeof(Element) * 8);
+                    fpscr.qc = 1;
+                } else {
+                    destElem = mid;
+                }
+            } else {
+                destElem = srcElem1;
+            }
+            FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("uqshrn", "UqshrnX", "SimdShiftOp", smallUnsignedTypes,
+                      uqshrnCode, hasImm=True)
+    twoRegNarrowInstX("uqshrn2", "Uqshrn2X", "SimdShiftOp", smallUnsignedTypes,
+                      uqshrnCode, hasImm=True, hi=True)
+    twoRegNarrowInstX("uqshrn", "UqshrnScX", "SimdShiftOp", smallUnsignedTypes,
+                      uqshrnCode, hasImm=True, scalar=True)
+    # UQSUB
+    uqsubCode = '''
+            destElem = srcElem1 - srcElem2;
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            if (destElem > srcElem1) {
+                destElem = 0;
+                fpscr.qc = 1;
+            }
+            FpscrQc = fpscr;
+    '''
+    threeEqualRegInstX("uqsub", "UqsubDX", "SimdAddOp", smallUnsignedTypes, 2,
+                       uqsubCode)
+    threeEqualRegInstX("uqsub", "UqsubQX", "SimdAddOp", unsignedTypes, 4,
+                       uqsubCode)
+    threeEqualRegInstX("uqsub", "UqsubScX", "SimdAddOp", unsignedTypes, 4,
+                       uqsubCode, scalar=True)
+    # UQXTN
+    uqxtnCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            destElem = srcElem1;
+            if ((BigElement)destElem != srcElem1) {
+                fpscr.qc = 1;
+                destElem = mask(sizeof(Element) * 8);
+            }
+            FpscrQc = fpscr;
+    '''
+    twoRegNarrowInstX("uqxtn", "UqxtnX", "SimdMiscOp", smallUnsignedTypes,
+                      uqxtnCode)
+    twoRegNarrowInstX("uqxtn", "Uqxtn2X", "SimdMiscOp", smallUnsignedTypes,
+                      uqxtnCode, hi=True)
+    twoRegNarrowInstX("uqxtn", "UqxtnScX", "SimdMiscOp", smallUnsignedTypes,
+                      uqxtnCode, scalar=True)
+    # URECPE
+    urecpeCode = "destElem = unsignedRecipEstimate(srcElem1);"
+    twoEqualRegInstX("urecpe", "UrecpeDX", "SimdMultAccOp", ("uint32_t",), 2,
+                     urecpeCode)
+    twoEqualRegInstX("urecpe", "UrecpeQX", "SimdMultAccOp", ("uint32_t",), 4,
+                     urecpeCode)
+    # URHADD
+    threeEqualRegInstX("urhadd", "UrhaddDX", "SimdAddOp", smallUnsignedTypes,
+                       2, rhaddCode)
+    threeEqualRegInstX("urhadd", "UrhaddQX", "SimdAddOp", smallUnsignedTypes,
+                       4, rhaddCode)
+    # URSHL
+    threeEqualRegInstX("urshl", "UrshlDX", "SimdShiftOp", unsignedTypes, 2,
+                       rshlCode)
+    threeEqualRegInstX("urshl", "UrshlQX", "SimdShiftOp", unsignedTypes, 4,
+                       rshlCode)
+    # URSHR
+    twoEqualRegInstX("urshr", "UrshrDX", "SimdShiftOp", unsignedTypes, 2,
+                     rshrCode, hasImm=True)
+    twoEqualRegInstX("urshr", "UrshrQX", "SimdShiftOp", unsignedTypes, 4,
+                     rshrCode, hasImm=True)
+    # URSQRTE
+    ursqrteCode = "destElem = unsignedRSqrtEstimate(srcElem1);"
+    twoEqualRegInstX("ursqrte", "UrsqrteDX", "SimdSqrtOp", ("uint32_t",), 2,
+                     ursqrteCode)
+    twoEqualRegInstX("ursqrte", "UrsqrteQX", "SimdSqrtOp", ("uint32_t",), 4,
+                     ursqrteCode)
+    # URSRA
+    twoEqualRegInstX("ursra", "UrsraDX", "SimdShiftOp", unsignedTypes, 2,
+                     rsraCode, True, hasImm=True)
+    twoEqualRegInstX("ursra", "UrsraQX", "SimdShiftOp", unsignedTypes, 4,
+                     rsraCode, True, hasImm=True)
+    # USHL
+    threeEqualRegInstX("ushl", "UshlDX", "SimdShiftOp", unsignedTypes, 2,
+                       shlCode)
+    threeEqualRegInstX("ushl", "UshlQX", "SimdShiftOp", unsignedTypes, 4,
+                       shlCode)
+    # USHLL, USHLL2
+    twoRegLongInstX("ushll", "UshllX", "SimdShiftOp", smallUnsignedTypes,
+                    shllCode, hasImm=True)
+    twoRegLongInstX("ushll", "Ushll2X", "SimdShiftOp", smallUnsignedTypes,
+                    shllCode, hi=True, hasImm=True)
+    # USHR
+    twoEqualRegInstX("ushr", "UshrDX", "SimdShiftOp", unsignedTypes, 2,
+                     shrCode, hasImm=True)
+    twoEqualRegInstX("ushr", "UshrQX", "SimdShiftOp", unsignedTypes, 4,
+                     shrCode, hasImm=True)
+    # USQADD
+    usqaddCode = '''
+            FPSCR fpscr = (FPSCR) FpscrQc;
+            Element tmp = destElem + srcElem1;
+            if (bits(srcElem1, sizeof(Element) * 8 - 1) == 0) {
+                if (tmp < srcElem1 || tmp < destElem) {
+                    destElem = (Element)(-1);
+                    fpscr.qc = 1;
+                } else {
+                    destElem = tmp;
+                }
+            } else {
+                Element absSrcElem1 = (~srcElem1) + 1;
+                if (absSrcElem1 > destElem) {
+                    destElem = 0;
+                    fpscr.qc = 1;
+                } else {
+                    destElem = tmp;
+                }
+            }
+            FpscrQc = fpscr;
+    '''
+    twoEqualRegInstX("usqadd", "UsqaddDX", "SimdAddOp", smallUnsignedTypes, 2,
+                     usqaddCode, True)
+    twoEqualRegInstX("usqadd", "UsqaddQX", "SimdAddOp", unsignedTypes, 4,
+                     usqaddCode, True)
+    twoEqualRegInstX("usqadd", "UsqaddScX", "SimdAddOp", unsignedTypes, 4,
+                     usqaddCode, True, scalar=True)
+    # USRA
+    twoEqualRegInstX("usra", "UsraDX", "SimdShiftOp", unsignedTypes, 2,
+                     sraCode, True, hasImm=True)
+    twoEqualRegInstX("usra", "UsraQX", "SimdShiftOp", unsignedTypes, 4,
+                     sraCode, True, hasImm=True)
+    # USUBL
+    threeRegLongInstX("usubl", "UsublX", "SimdAddOp", smallUnsignedTypes,
+                      sublwCode)
+    threeRegLongInstX("usubl2", "Usubl2X", "SimdAddOp", smallUnsignedTypes,
+                      sublwCode, hi=True)
+    # USUBW
+    threeRegWideInstX("usubw", "UsubwX", "SimdAddOp", smallUnsignedTypes,
+                      sublwCode)
+    threeRegWideInstX("usubw2", "Usubw2X", "SimdAddOp", smallUnsignedTypes,
+                      sublwCode, hi=True)
+    # UXTL -> alias to USHLL
+    # UZP1
+    uzpCode = '''
+        unsigned part = %s;
+        for (unsigned i = 0; i < eCount / 2; i++) {
+            destReg.elements[i] = srcReg1.elements[2 * i + part];
+            destReg.elements[eCount / 2 + i] = srcReg2.elements[2 * i + part];
+        }
+    '''
+    threeRegScrambleInstX("Uzp1", "Uzp1DX", "SimdAluOp", smallUnsignedTypes, 2,
+                          uzpCode % "0")
+    threeRegScrambleInstX("Uzp1", "Uzp1QX", "SimdAluOp", unsignedTypes, 4,
+                          uzpCode % "0")
+    # UZP2
+    threeRegScrambleInstX("Uzp2", "Uzp2DX", "SimdAluOp", smallUnsignedTypes, 2,
+                          uzpCode % "1")
+    threeRegScrambleInstX("Uzp2", "Uzp2QX", "SimdAluOp", unsignedTypes, 4,
+                          uzpCode % "1")
+    # XTN, XTN2
+    xtnCode = "destElem = srcElem1;"
+    twoRegNarrowInstX("Xtn", "XtnX", "SimdMiscOp", smallUnsignedTypes, xtnCode)
+    twoRegNarrowInstX("Xtn", "Xtn2X", "SimdMiscOp", smallUnsignedTypes,
+                      xtnCode, hi=True)
+    # ZIP1
+    zipCode = '''
+        unsigned base = %s;
+        for (unsigned i = 0; i < eCount / 2; i++) {
+            destReg.elements[2 * i] = srcReg1.elements[base + i];
+            destReg.elements[2 * i + 1] = srcReg2.elements[base + i];
+        }
+    '''
+    threeRegScrambleInstX("zip1", "Zip1DX", "SimdAluOp", smallUnsignedTypes, 2,
+                          zipCode % "0")
+    threeRegScrambleInstX("zip1", "Zip1QX", "SimdAluOp", unsignedTypes, 4,
+                          zipCode % "0")
+    # ZIP2
+    threeRegScrambleInstX("zip2", "Zip2DX", "SimdAluOp", smallUnsignedTypes, 2,
+                          zipCode % "eCount / 2")
+    threeRegScrambleInstX("zip2", "Zip2QX", "SimdAluOp", unsignedTypes, 4,
+                          zipCode % "eCount / 2")
+
+}};
diff --git a/src/arch/arm/isa/insts/neon64_mem.isa b/src/arch/arm/isa/insts/neon64_mem.isa
new file mode 100644
index 000000000..32a37f87e
--- /dev/null
+++ b/src/arch/arm/isa/insts/neon64_mem.isa
@@ -0,0 +1,471 @@
+// -*- mode: c++ -*-
+
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Mbou Eyole
+//          Giacomo Gabrielli
+
+let {{
+
+    header_output = ''
+    decoder_output = ''
+    exec_output = ''
+
+    def mkMemAccMicroOp(name):
+        global header_output, decoder_output, exec_output
+        SPAlignmentCheckCodeNeon = '''
+            if (baseIsSP && bits(XURa, 3, 0) &&
+                SPAlignmentCheckEnabled(xc->tcBase())) {
+                return new SPAlignmentFault();
+            }
+        '''
+        eaCode = SPAlignmentCheckCodeNeon + '''
+            EA = XURa + imm;
+        '''
+        memDecl = '''
+            const int MaxNumBytes = 16;
+            union MemUnion {
+                uint8_t bytes[MaxNumBytes];
+                uint32_t floatRegBits[MaxNumBytes / 4];
+            };
+        '''
+
+        # Do endian conversion for all the elements
+        convCode = '''
+            VReg x = {0, 0};
+
+            x.lo = (((XReg) memUnion.floatRegBits[1]) << 32) |
+                (XReg) memUnion.floatRegBits[0];
+            x.hi = (((XReg) memUnion.floatRegBits[3]) << 32) |
+                (XReg) memUnion.floatRegBits[2];
+
+            const unsigned eCount = 16 / (1 << eSize);
+
+            if (isBigEndian64(xc->tcBase())) {
+                for (unsigned i = 0; i < eCount; i++) {
+                    switch (eSize) {
+                      case 0x3:  // 64-bit
+                        writeVecElem(&x, (XReg) gtobe(
+                            (uint64_t) readVecElem(x, i, eSize)), i, eSize);
+                        break;
+                      case 0x2:  // 32-bit
+                        writeVecElem(&x, (XReg) gtobe(
+                            (uint32_t) readVecElem(x, i, eSize)), i, eSize);
+                        break;
+                      case 0x1:  // 16-bit
+                        writeVecElem(&x, (XReg) gtobe(
+                            (uint16_t) readVecElem(x, i, eSize)), i, eSize);
+                        break;
+                      default:  // 8-bit
+                        break;  // Nothing to do here
+                    }
+                }
+            } else {
+                for (unsigned i = 0; i < eCount; i++) {
+                    switch (eSize) {
+                      case 0x3:  // 64-bit
+                        writeVecElem(&x, (XReg) gtole(
+                            (uint64_t) readVecElem(x, i, eSize)), i, eSize);
+                        break;
+                      case 0x2:  // 32-bit
+                        writeVecElem(&x, (XReg) gtole(
+                            (uint32_t) readVecElem(x, i, eSize)), i, eSize);
+                        break;
+                      case 0x1:  // 16-bit
+                        writeVecElem(&x, (XReg) gtole(
+                            (uint16_t) readVecElem(x, i, eSize)), i, eSize);
+                        break;
+                      default:  // 8-bit
+                        break;  // Nothing to do here
+                    }
+                }
+            }
+
+            memUnion.floatRegBits[0] = (uint32_t) x.lo;
+            memUnion.floatRegBits[1] = (uint32_t) (x.lo >> 32);
+            memUnion.floatRegBits[2] = (uint32_t) x.hi;
+            memUnion.floatRegBits[3] = (uint32_t) (x.hi >> 32);
+        '''
+
+        # Offload everything into registers
+        regSetCode = ''
+        for reg in range(4):
+            regSetCode += '''
+            AA64FpDestP%(reg)d_uw = gtoh(memUnion.floatRegBits[%(reg)d]);
+            ''' % { 'reg' : reg }
+
+        # Pull everything in from registers
+        regGetCode = ''
+        for reg in range(4):
+            regGetCode += '''
+            memUnion.floatRegBits[%(reg)d] = htog(AA64FpDestP%(reg)d_uw);
+            ''' % { 'reg' : reg }
+
+        loadMemAccCode = convCode + regSetCode
+        storeMemAccCode = regGetCode + convCode
+
+        loadIop = InstObjParams(name + 'ld',
+                'MicroNeonLoad64',
+                'MicroNeonMemOp',
+            {   'mem_decl' : memDecl,
+                'memacc_code' : loadMemAccCode,
+                'ea_code' : simd64EnabledCheckCode + eaCode,
+            },
+            [ 'IsMicroop', 'IsMemRef', 'IsLoad' ])
+        storeIop = InstObjParams(name + 'st',
+                'MicroNeonStore64',
+                'MicroNeonMemOp',
+            {   'mem_decl' : memDecl,
+                'memacc_code' : storeMemAccCode,
+                'ea_code' : simd64EnabledCheckCode + eaCode,
+            },
+            [ 'IsMicroop', 'IsMemRef', 'IsStore' ])
+
+        exec_output += NeonLoadExecute64.subst(loadIop) + \
+            NeonLoadInitiateAcc64.subst(loadIop) + \
+            NeonLoadCompleteAcc64.subst(loadIop) + \
+            NeonStoreExecute64.subst(storeIop) + \
+            NeonStoreInitiateAcc64.subst(storeIop) + \
+            NeonStoreCompleteAcc64.subst(storeIop)
+        header_output += MicroNeonMemDeclare64.subst(loadIop) + \
+            MicroNeonMemDeclare64.subst(storeIop)
+
+    def mkMarshalMicroOp(name, Name):
+        global header_output, decoder_output, exec_output
+
+        getInputCodeOp1L = ''
+        for v in range(4):
+            for p in range(4):
+                getInputCodeOp1L += '''
+            writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)d_uw,
+                         %(p)d, 0x2);
+            ''' % { 'v' : v, 'p' : p }
+
+        getInputCodeOp1S = ''
+        for v in range(4):
+            for p in range(4):
+                getInputCodeOp1S += '''
+            writeVecElem(&input[%(v)d], (XReg) AA64FpOp1P%(p)dV%(v)dS_uw,
+                         %(p)d, 0x2);
+            ''' % { 'v' : v, 'p' : p }
+
+        if name == 'deint_neon_uop':
+
+            eCode = '''
+                VReg input[4];  // input data from scratch area
+                VReg output[2];  // output data to arch. SIMD regs
+                VReg temp;
+                temp.lo = 0;
+                temp.hi = 0;
+            '''
+            for p in range(4):
+                eCode += '''
+                writeVecElem(&temp, (XReg) AA64FpDestP%(p)dV1L_uw, %(p)d, 0x2);
+                ''' % { 'p' : p }
+            eCode += getInputCodeOp1L
+
+            # Note that numRegs is not always the same as numStructElems; in
+            # particular, for LD1/ST1, numStructElems is 1 but numRegs can be
+            # 1, 2, 3 or 4
+
+            eCode += '''
+                output[0].lo = 0;
+                output[0].hi = 0;
+                output[1].lo = 0;
+                output[1].hi = 0;
+
+                int eCount = dataSize / (8 << eSize);
+                int eSizeBytes = 1 << eSize;  // element size in bytes
+                int numBytes = step * dataSize / 4;
+                int totNumBytes = numRegs * dataSize / 8;
+
+                int structElemNo, pos, a, b;
+                XReg data;
+
+                for (int r = 0; r < 2; ++r) {
+                    for (int i = 0; i < eCount; ++i) {
+                        if (numBytes < totNumBytes) {
+                            structElemNo = r + (step * 2);
+                            if (numStructElems == 1) {
+                                pos = (eSizeBytes * i) +
+                                    (eCount * structElemNo * eSizeBytes);
+                            } else {
+                                pos = (numStructElems * eSizeBytes * i) +
+                                    (structElemNo * eSizeBytes);
+                            }
+                            a = pos / 16;
+                            b = (pos % 16) / eSizeBytes;
+                            data = (XReg) readVecElem(input[a], (XReg) b,
+                                                      eSize);
+                            writeVecElem(&output[r], data, i, eSize);
+                            numBytes += eSizeBytes;
+                        }
+                    }
+                }
+            '''
+            for p in range(4):
+                eCode += '''
+                AA64FpDestP%(p)dV0L_uw = (uint32_t) readVecElem(output[0],
+                    %(p)d, 0x2);
+                ''' % { 'p' : p }
+            eCode += '''
+                if ((numRegs % 2 == 0) || (numRegs == 3 && step == 0)) {
+            '''
+            for p in range(4):
+                eCode += '''
+                    AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem(
+                        output[1], %(p)d, 0x2);
+                ''' % { 'p' : p }
+            eCode += '''
+                } else {
+            '''
+            for p in range(4):
+                eCode += '''
+                    AA64FpDestP%(p)dV1L_uw = (uint32_t) readVecElem(temp,
+                        %(p)d, 0x2);
+                ''' % { 'p' : p }
+            eCode += '''
+                }
+            '''
+
+            iop = InstObjParams(name, Name, 'MicroNeonMixOp64',
+                                { 'code' : eCode }, ['IsMicroop'])
+            header_output += MicroNeonMixDeclare64.subst(iop)
+            exec_output += MicroNeonMixExecute64.subst(iop)
+
+        elif name == 'int_neon_uop':
+
+            eCode = '''
+                VReg input[4];  // input data from arch. SIMD regs
+                VReg output[2];  // output data to scratch area
+            '''
+
+            eCode += getInputCodeOp1S
+
+            # Note that numRegs is not always the same as numStructElems; in
+            # particular, for LD1/ST1, numStructElems is 1 but numRegs can be
+            # 1, 2, 3 or 4
+
+            eCode += '''
+                int eCount = dataSize / (8 << eSize);
+                int eSizeBytes = 1 << eSize;
+                int totNumBytes = numRegs * dataSize / 8;
+                int numOutputElems = 128 / (8 << eSize);
+                int stepOffset = step * 32;
+
+                for (int i = 0; i < 2; ++i) {
+                    output[i].lo = 0;
+                    output[i].hi = 0;
+                }
+
+                int r = 0, k = 0, i, j;
+                XReg data;
+
+                for (int pos = stepOffset; pos < 32 + stepOffset;
+                        pos += eSizeBytes) {
+                    if (pos < totNumBytes) {
+                        if (numStructElems == 1) {
+                            i = (pos / eSizeBytes) % eCount;
+                            j = pos / (eCount * eSizeBytes);
+                        } else {
+                            i = pos / (numStructElems * eSizeBytes);
+                            j = (pos % (numStructElems * eSizeBytes)) /
+                                eSizeBytes;
+                        }
+                        data = (XReg) readVecElem(input[j], (XReg) i, eSize);
+                        writeVecElem(&output[r], data, k, eSize);
+                        k++;
+                        if (k == numOutputElems){
+                            k = 0;
+                            ++r;
+                        }
+                    }
+                }
+                '''
+            for v in range(2):
+                for p in range(4):
+                    eCode += '''
+                AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem(
+                    output[%(v)d], %(p)d, 0x2);
+                ''' % { 'v': v, 'p': p}
+
+            iop = InstObjParams(name, Name, 'MicroNeonMixOp64',
+                                { 'code' : eCode }, ['IsMicroop'])
+            header_output += MicroNeonMixDeclare64.subst(iop)
+            exec_output += MicroNeonMixExecute64.subst(iop)
+
+        elif name == 'unpack_neon_uop':
+
+            eCode = '''
+                VReg input[4];  //input data from scratch area
+                VReg output[2];  //output data to arch. SIMD regs
+            '''
+
+            eCode += getInputCodeOp1L
+
+            # Fill output regs with register data initially.  Note that
+            # elements in output register outside indexed lanes are left
+            # untouched
+            for v in range(2):
+                for p in range(4):
+                    eCode += '''
+                writeVecElem(&output[%(v)d], (XReg) AA64FpDestP%(p)dV%(v)dL_uw,
+                             %(p)d, 0x2);
+                ''' % { 'v': v, 'p': p}
+            eCode += '''
+                int eCount = dataSize / (8 << eSize);
+                int eCount128 = 128 / (8 << eSize);
+                int eSizeBytes = 1 << eSize;
+                int totNumBytes = numStructElems * eSizeBytes;
+                int numInputElems = eCount128;
+                int stepOffset = step * 2 * eSizeBytes;
+                int stepLimit = 2 * eSizeBytes;
+
+                int r = 0, i, j;
+                XReg data;
+
+                for (int pos = stepOffset; pos < stepLimit + stepOffset;
+                        pos += eSizeBytes) {
+                    if (pos < totNumBytes) {
+                        r = pos / eSizeBytes;
+                        j = r / numInputElems;
+                        i = r % numInputElems;
+                        data = (XReg) readVecElem(input[j], (XReg) i, eSize);
+
+                        if (replicate) {
+                            for (int i = 0; i < eCount128; ++i) {
+                                if (i < eCount) {
+                                    writeVecElem(&output[r % 2], data, i,
+                                                 eSize);
+                                } else {  // zero extend if necessary
+                                    writeVecElem(&output[r % 2], (XReg) 0, i,
+                                                 eSize);
+                                }
+                            }
+                        } else {
+                            writeVecElem(&output[r % 2], data, lane, eSize);
+                        }
+                    }
+                }
+            '''
+            for v in range(2):
+                for p in range(4):
+                    eCode += '''
+                AA64FpDestP%(p)dV%(v)dL_uw = (uint32_t) readVecElem(
+                    output[%(v)d], %(p)d, 0x2);
+                ''' % { 'v' : v, 'p' : p }
+
+            iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64',
+                                { 'code' : eCode }, ['IsMicroop'])
+            header_output += MicroNeonMixLaneDeclare64.subst(iop)
+            exec_output += MicroNeonMixExecute64.subst(iop)
+
+        elif name == 'pack_neon_uop':
+
+            eCode = '''
+                VReg input[4];  // input data from arch. SIMD regs
+                VReg output[2];  // output data to scratch area
+            '''
+
+            eCode += getInputCodeOp1S
+
+            eCode += '''
+                int eSizeBytes = 1 << eSize;
+                int numOutputElems = 128 / (8 << eSize);
+                int totNumBytes = numStructElems * eSizeBytes;
+                int stepOffset = step * 32;
+                int stepLimit = 32;
+
+                int r = 0, i, j;
+                XReg data;
+
+                for (int i = 0; i < 2; ++i) {
+                    output[i].lo = 0;
+                    output[i].hi = 0;
+                }
+
+                for (int pos = stepOffset; pos < stepLimit + stepOffset;
+                        pos += eSizeBytes) {
+                    if (pos < totNumBytes) {
+                        r = pos / 16;
+                        j = pos / eSizeBytes;
+                        i = (pos / eSizeBytes) %  numOutputElems;
+                        data = (XReg) readVecElem(input[j], lane, eSize);
+                        writeVecElem(&output[r % 2], data, i, eSize);
+                    }
+                }
+            '''
+
+            for v in range(2):
+                for p in range(4):
+                    eCode += '''
+                AA64FpDestP%(p)dV%(v)d_uw = (uint32_t) readVecElem(
+                    output[%(v)d], %(p)d, 0x2);
+                ''' % { 'v' : v, 'p' : p }
+
+            iop = InstObjParams(name, Name, 'MicroNeonMixLaneOp64',
+                                { 'code' : eCode }, ['IsMicroop'])
+            header_output += MicroNeonMixLaneDeclare64.subst(iop)
+            exec_output += MicroNeonMixExecute64.subst(iop)
+
+    # Generate instructions
+    mkMemAccMicroOp('mem_neon_uop')
+    mkMarshalMicroOp('deint_neon_uop', 'MicroDeintNeon64')
+    mkMarshalMicroOp('int_neon_uop', 'MicroIntNeon64')
+    mkMarshalMicroOp('unpack_neon_uop', 'MicroUnpackNeon64')
+    mkMarshalMicroOp('pack_neon_uop', 'MicroPackNeon64')
+
+}};
+
+let {{
+
+    iop = InstObjParams('vldmult64', 'VldMult64', 'VldMultOp64', '', [])
+    header_output += VMemMultDeclare64.subst(iop)
+    decoder_output += VMemMultConstructor64.subst(iop)
+
+    iop = InstObjParams('vstmult64', 'VstMult64', 'VstMultOp64', '', [])
+    header_output += VMemMultDeclare64.subst(iop)
+    decoder_output += VMemMultConstructor64.subst(iop)
+
+    iop = InstObjParams('vldsingle64', 'VldSingle64', 'VldSingleOp64', '', [])
+    header_output += VMemSingleDeclare64.subst(iop)
+    decoder_output += VMemSingleConstructor64.subst(iop)
+
+    iop = InstObjParams('vstsingle64', 'VstSingle64', 'VstSingleOp64', '', [])
+    header_output += VMemSingleDeclare64.subst(iop)
+    decoder_output += VMemSingleConstructor64.subst(iop)
+
+}};
diff --git a/src/arch/arm/isa/insts/str.isa b/src/arch/arm/isa/insts/str.isa
index 80846053b..3f595692a 100644
--- a/src/arch/arm/isa/insts/str.isa
+++ b/src/arch/arm/isa/insts/str.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -38,6 +38,7 @@
 // Authors: Gabe Black
 
 let {{
+    import math
 
     header_output = ""
     decoder_output = ""
@@ -77,7 +78,9 @@ let {{
             (newHeader,
              newDecoder,
              newExec) = self.fillTemplates(self.name, self.Name, codeBlobs,
-                                           self.memFlags, self.instFlags, base, wbDecl)
+                                           self.memFlags, self.instFlags,
+                                           base, wbDecl, None, False,
+                                           self.size, self.sign)
 
             header_output += newHeader
             decoder_output += newDecoder
@@ -171,7 +174,7 @@ let {{
                                       self.size, self.sign, self.user)
 
             # Add memory request flags where necessary
-            self.memFlags.append("%d" % (self.size - 1))
+            self.memFlags.append("%d" % int(math.log(self.size, 2)))
             if self.user:
                 self.memFlags.append("ArmISA::TLB::UserMode")
 
diff --git a/src/arch/arm/isa/insts/str64.isa b/src/arch/arm/isa/insts/str64.isa
new file mode 100644
index 000000000..c15dca16e
--- /dev/null
+++ b/src/arch/arm/isa/insts/str64.isa
@@ -0,0 +1,372 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    class StoreInst64(LoadStoreInst):
+        execBase = 'Store64'
+        micro = False
+
+        def __init__(self, mnem, Name, size=4, user=False, flavor="normal",
+                     top = False):
+            super(StoreInst64, self).__init__()
+
+            self.name = mnem
+            self.Name = Name
+            self.size = size
+            self.user = user
+            self.flavor = flavor
+            self.top = top
+
+            self.memFlags = ["ArmISA::TLB::MustBeOne"]
+            self.instFlags = []
+            self.codeBlobs = { "postacc_code" : "" }
+
+            # Add memory request flags where necessary
+            if self.user:
+                self.memFlags.append("ArmISA::TLB::UserMode")
+
+            if self.flavor in ("relexp", "exp"):
+                # For exclusive pair ops alignment check is based on total size
+                self.memFlags.append("%d" % int(math.log(self.size, 2) + 1))
+            elif not (self.size == 16 and self.top):
+                # Only the first microop should perform alignment checking.
+                self.memFlags.append("%d" % int(math.log(self.size, 2)))
+
+            if self.flavor not in ("release", "relex", "exclusive",
+                                   "relexp", "exp"):
+                self.memFlags.append("ArmISA::TLB::AllowUnaligned")
+
+            if self.micro:
+                self.instFlags.append("IsMicroop")
+
+            if self.flavor in ("release", "relex", "relexp"):
+                self.instFlags.extend(["IsMemBarrier",
+                                       "IsWriteBarrier",
+                                       "IsReadBarrier"])
+            if self.flavor in ("relex", "exclusive", "exp", "relexp"):
+                self.instFlags.append("IsStoreConditional")
+                self.memFlags.append("Request::LLSC")
+
+        def emitHelper(self, base = 'Memory64', wbDecl = None):
+            global header_output, decoder_output, exec_output
+
+            # If this is a microop itself, don't allow anything that would
+            # require further microcoding.
+            if self.micro:
+                assert not wbDecl
+
+            fa_code = None
+            if not self.micro and self.flavor in ("normal", "release"):
+                fa_code = '''
+                    fault->annotate(ArmFault::SAS, %s);
+                    fault->annotate(ArmFault::SSE, false);
+                    fault->annotate(ArmFault::SRT, dest);
+                    fault->annotate(ArmFault::SF, %s);
+                    fault->annotate(ArmFault::AR, %s);
+                ''' % ("0" if self.size == 1 else
+                       "1" if self.size == 2 else
+                       "2" if self.size == 4 else "3",
+                       "true" if self.size == 8 else "false",
+                       "true" if self.flavor == "release" else "false")
+
+            (newHeader, newDecoder, newExec) = \
+                self.fillTemplates(self.name, self.Name, self.codeBlobs,
+                                   self.memFlags, self.instFlags,
+                                   base, wbDecl, faCode=fa_code)
+
+            header_output += newHeader
+            decoder_output += newDecoder
+            exec_output += newExec
+
+        def buildEACode(self):
+            # Address computation
+            eaCode = ""
+            if self.flavor == "fp":
+                eaCode += vfp64EnabledCheckCode
+
+            eaCode += SPAlignmentCheckCode + "EA = XBase"
+            if self.size == 16:
+                if self.top:
+                    eaCode += " + (isBigEndian64(xc->tcBase()) ? 0 : 8)"
+                else:
+                    eaCode += " + (isBigEndian64(xc->tcBase()) ? 8 : 0)"
+            if not self.post:
+                eaCode += self.offset
+            eaCode += ";"
+
+            self.codeBlobs["ea_code"] = eaCode
+
+
+    class StoreImmInst64(StoreInst64):
+        def __init__(self, *args, **kargs):
+            super(StoreImmInst64, self).__init__(*args, **kargs)
+            self.offset = "+ imm"
+
+            self.wbDecl = "MicroAddXiUop(machInst, base, base, imm);"
+
+    class StoreRegInst64(StoreInst64):
+        def __init__(self, *args, **kargs):
+            super(StoreRegInst64, self).__init__(*args, **kargs)
+            self.offset = "+ extendReg64(XOffset, type, shiftAmt, 64)"
+
+            self.wbDecl = \
+                "MicroAddXERegUop(machInst, base, base, " + \
+                "                 offset, type, shiftAmt);"
+
+    class StoreRawRegInst64(StoreInst64):
+        def __init__(self, *args, **kargs):
+            super(StoreRawRegInst64, self).__init__(*args, **kargs)
+            self.offset = ""
+
+    class StoreSingle64(StoreInst64):
+        def emit(self):
+            self.buildEACode()
+
+            # Code that actually handles the access
+            if self.flavor == "fp":
+                if self.size in (1, 2, 4):
+                    accCode = '''
+                        Mem%(suffix)s =
+                            cSwap(AA64FpDestP0%(suffix)s, isBigEndian64(xc->tcBase()));
+                    '''
+                elif self.size == 8 or (self.size == 16 and not self.top):
+                    accCode = '''
+                        uint64_t data = AA64FpDestP1_uw;
+                        data = (data << 32) | AA64FpDestP0_uw;
+                        Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase()));
+                    '''
+                elif self.size == 16 and self.top:
+                    accCode = '''
+                        uint64_t data = AA64FpDestP3_uw;
+                        data = (data << 32) | AA64FpDestP2_uw;
+                        Mem%(suffix)s = cSwap(data, isBigEndian64(xc->tcBase()));
+                    '''
+            else:
+                accCode = \
+                    'Mem%(suffix)s = cSwap(XDest%(suffix)s, isBigEndian64(xc->tcBase()));'
+            if self.size == 16:
+                accCode = accCode % \
+                    { "suffix" : buildMemSuffix(False, 8) }
+            else:
+                accCode = accCode % \
+                    { "suffix" : buildMemSuffix(False, self.size) }
+
+            self.codeBlobs["memacc_code"] = accCode
+
+            if self.flavor in ("relex", "exclusive"):
+                self.instFlags.append("IsStoreConditional")
+                self.memFlags.append("Request::LLSC")
+
+            # Push it out to the output files
+            wbDecl = None
+            if self.writeback and not self.micro:
+                wbDecl = self.wbDecl
+            self.emitHelper(self.base, wbDecl)
+
+    class StoreDouble64(StoreInst64):
+        def emit(self):
+            self.buildEACode()
+
+            # Code that actually handles the access
+            if self.flavor == "fp":
+                accCode = '''
+                    uint64_t data = AA64FpDest2P0_uw;
+                    data = (data << 32) | AA64FpDestP0_uw;
+                    Mem_ud = cSwap(data, isBigEndian64(xc->tcBase()));
+                '''
+            else:
+                if self.size == 4:
+                    accCode = '''
+                        uint64_t data = XDest2_uw;
+                        data = (data << 32) | XDest_uw;
+                        Mem_ud = cSwap(data, isBigEndian64(xc->tcBase()));
+                    '''
+                elif self.size == 8:
+                    accCode = '''
+                        // This temporary needs to be here so that the parser
+                        // will correctly identify this instruction as a store.
+                        Twin64_t temp;
+                        temp.a = XDest_ud;
+                        temp.b = XDest2_ud;
+                        Mem_tud = temp;
+                    '''
+            self.codeBlobs["memacc_code"] = accCode
+
+            # Push it out to the output files
+            wbDecl = None
+            if self.writeback and not self.micro:
+                wbDecl = self.wbDecl
+            self.emitHelper(self.base, wbDecl)
+
+    class StoreImm64(StoreImmInst64, StoreSingle64):
+        decConstBase = 'LoadStoreImm64'
+        base = 'ArmISA::MemoryImm64'
+        writeback = False
+        post = False
+
+    class StorePre64(StoreImmInst64, StoreSingle64):
+        decConstBase = 'LoadStoreImm64'
+        base = 'ArmISA::MemoryPreIndex64'
+        writeback = True
+        post = False
+
+    class StorePost64(StoreImmInst64, StoreSingle64):
+        decConstBase = 'LoadStoreImm64'
+        base = 'ArmISA::MemoryPostIndex64'
+        writeback = True
+        post = True
+
+    class StoreReg64(StoreRegInst64, StoreSingle64):
+        decConstBase = 'LoadStoreReg64'
+        base = 'ArmISA::MemoryReg64'
+        writeback = False
+        post = False
+
+    class StoreRaw64(StoreRawRegInst64, StoreSingle64):
+        decConstBase = 'LoadStoreRaw64'
+        base = 'ArmISA::MemoryRaw64'
+        writeback = False
+        post = False
+
+    class StoreEx64(StoreRawRegInst64, StoreSingle64):
+        decConstBase = 'LoadStoreEx64'
+        base = 'ArmISA::MemoryEx64'
+        writeback = False
+        post = False
+        execBase = 'StoreEx64'
+        def __init__(self, *args, **kargs):
+            super(StoreEx64, self).__init__(*args, **kargs)
+            self.codeBlobs["postacc_code"] = "XResult = !writeResult;"
+
+    def buildStores64(mnem, NameBase, size, flavor="normal"):
+        StoreImm64(mnem, NameBase + "_IMM", size, flavor=flavor).emit()
+        StorePre64(mnem, NameBase + "_PRE", size, flavor=flavor).emit()
+        StorePost64(mnem, NameBase + "_POST", size, flavor=flavor).emit()
+        StoreReg64(mnem, NameBase + "_REG", size, flavor=flavor).emit()
+
+    buildStores64("strb", "STRB64", 1)
+    buildStores64("strh", "STRH64", 2)
+    buildStores64("str", "STRW64", 4)
+    buildStores64("str", "STRX64", 8)
+    buildStores64("str", "STRBFP64", 1, flavor="fp")
+    buildStores64("str", "STRHFP64", 2, flavor="fp")
+    buildStores64("str", "STRSFP64", 4, flavor="fp")
+    buildStores64("str", "STRDFP64", 8, flavor="fp")
+
+    StoreImm64("sturb", "STURB64_IMM", 1).emit()
+    StoreImm64("sturh", "STURH64_IMM", 2).emit()
+    StoreImm64("stur", "STURW64_IMM", 4).emit()
+    StoreImm64("stur", "STURX64_IMM", 8).emit()
+    StoreImm64("stur", "STURBFP64_IMM", 1, flavor="fp").emit()
+    StoreImm64("stur", "STURHFP64_IMM", 2, flavor="fp").emit()
+    StoreImm64("stur", "STURSFP64_IMM", 4, flavor="fp").emit()
+    StoreImm64("stur", "STURDFP64_IMM", 8, flavor="fp").emit()
+
+    StoreImm64("sttrb", "STTRB64_IMM", 1, user=True).emit()
+    StoreImm64("sttrh", "STTRH64_IMM", 2, user=True).emit()
+    StoreImm64("sttr", "STTRW64_IMM", 4, user=True).emit()
+    StoreImm64("sttr", "STTRX64_IMM", 8, user=True).emit()
+
+    StoreRaw64("stlr", "STLRX64", 8, flavor="release").emit()
+    StoreRaw64("stlr", "STLRW64", 4, flavor="release").emit()
+    StoreRaw64("stlrh", "STLRH64", 2, flavor="release").emit()
+    StoreRaw64("stlrb", "STLRB64", 1, flavor="release").emit()
+
+    StoreEx64("stlxr", "STLXRX64", 8, flavor="relex").emit()
+    StoreEx64("stlxr", "STLXRW64", 4, flavor="relex").emit()
+    StoreEx64("stlxrh", "STLXRH64", 2, flavor="relex").emit()
+    StoreEx64("stlxrb", "STLXRB64", 1, flavor="relex").emit()
+
+    StoreEx64("stxr", "STXRX64", 8, flavor="exclusive").emit()
+    StoreEx64("stxr", "STXRW64", 4, flavor="exclusive").emit()
+    StoreEx64("stxrh", "STXRH64", 2, flavor="exclusive").emit()
+    StoreEx64("stxrb", "STXRB64", 1, flavor="exclusive").emit()
+
+    class StoreImmU64(StoreImm64):
+        decConstBase = 'LoadStoreImmU64'
+        micro = True
+
+    class StoreImmDU64(StoreImmInst64, StoreDouble64):
+        decConstBase = 'LoadStoreImmDU64'
+        base = 'ArmISA::MemoryDImm64'
+        micro = True
+        post = False
+        writeback = False
+
+    class StoreImmDEx64(StoreImmInst64, StoreDouble64):
+        execBase = 'StoreEx64'
+        decConstBase = 'StoreImmDEx64'
+        base = 'ArmISA::MemoryDImmEx64'
+        micro = False
+        post = False
+        writeback = False
+        def __init__(self, *args, **kargs):
+            super(StoreImmDEx64, self).__init__(*args, **kargs)
+            self.codeBlobs["postacc_code"] = "XResult = !writeResult;"
+
+    class StoreRegU64(StoreReg64):
+        decConstBase = 'LoadStoreRegU64'
+        micro = True
+
+    StoreImmDEx64("stlxp", "STLXPW64", 4, flavor="relexp").emit()
+    StoreImmDEx64("stlxp", "STLXPX64", 8, flavor="relexp").emit()
+    StoreImmDEx64("stxp", "STXPW64", 4, flavor="exp").emit()
+    StoreImmDEx64("stxp", "STXPX64", 8, flavor="exp").emit()
+
+    StoreImmU64("strxi_uop", "MicroStrXImmUop", 8).emit()
+    StoreRegU64("strxr_uop", "MicroStrXRegUop", 8).emit()
+    StoreImmU64("strfpxi_uop", "MicroStrFpXImmUop", 8, flavor="fp").emit()
+    StoreRegU64("strfpxr_uop", "MicroStrFpXRegUop", 8, flavor="fp").emit()
+    StoreImmU64("strqbfpxi_uop", "MicroStrQBFpXImmUop",
+                16, flavor="fp", top=False).emit()
+    StoreRegU64("strqbfpxr_uop", "MicroStrQBFpXRegUop",
+                16, flavor="fp", top=False).emit()
+    StoreImmU64("strqtfpxi_uop", "MicroStrQTFpXImmUop",
+                16, flavor="fp", top=True).emit()
+    StoreRegU64("strqtfpxr_uop", "MicroStrQTFpXRegUop",
+                16, flavor="fp", top=True).emit()
+    StoreImmDU64("strdxi_uop", "MicroStrDXImmUop", 4).emit()
+    StoreImmDU64("strdfpxi_uop", "MicroStrDFpXImmUop", 4, flavor="fp").emit()
+
+}};
diff --git a/src/arch/arm/isa/insts/swap.isa b/src/arch/arm/isa/insts/swap.isa
index b42a1c4b2..f2ceed28e 100644
--- a/src/arch/arm/isa/insts/swap.isa
+++ b/src/arch/arm/isa/insts/swap.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -73,10 +73,7 @@ let {{
 
     swpPreAccCode = '''
         if (!((SCTLR)Sctlr).sw) {
-            if (FullSystem)
-                return new UndefinedInstruction;
-            else
-                return new UndefinedInstruction(false, mnemonic);
+            return new UndefinedInstruction(machInst, false, mnemonic);
         }
     '''
 
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index 64deef044..7a1213377 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -1,5 +1,5 @@
 // -*- mode:c++ -*-
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -80,6 +80,31 @@ let {{
             xc->%(func)s(this, %(op_idx)s, %(final_val)s);
         }
     '''
+    aarch64Read = '''
+        ((xc->%(func)s(this, %(op_idx)s)) & mask(intWidth))
+    '''
+    aarch64Write = '''
+        xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(intWidth))
+    '''
+    aarchX64Read = '''
+        ((xc->%(func)s(this, %(op_idx)s)) & mask(aarch64 ? 64 : 32))
+    '''
+    aarchX64Write = '''
+        xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(aarch64 ? 64 : 32))
+    '''
+    aarchW64Read = '''
+        ((xc->%(func)s(this, %(op_idx)s)) & mask(32))
+    '''
+    aarchW64Write = '''
+        xc->%(func)s(this, %(op_idx)s, (%(final_val)s) & mask(32))
+    '''
+    cntrlNsBankedWrite = '''
+        xc->setMiscReg(flattenMiscRegNsBanked(dest, xc->tcBase()), %(final_val)s)
+    '''
+
+    cntrlNsBankedRead = '''
+        xc->readMiscReg(flattenMiscRegNsBanked(op1, xc->tcBase()))
+    '''
 
     #PCState operands need to have a sorting index (the number at the end)
     #less than all the integer registers which might update the PC. That way
@@ -99,6 +124,18 @@ let {{
         return ('IntReg', 'uw', idx, 'IsInteger', srtNormal,
                 maybePCRead, maybePCWrite)
 
+    def intReg64(idx):
+        return ('IntReg', 'ud', idx, 'IsInteger', srtNormal,
+                aarch64Read, aarch64Write)
+
+    def intRegX64(idx, id = srtNormal):
+        return ('IntReg', 'ud', idx, 'IsInteger', id,
+                aarchX64Read, aarchX64Write)
+
+    def intRegW64(idx, id = srtNormal):
+        return ('IntReg', 'ud', idx, 'IsInteger', id,
+                aarchW64Read, aarchW64Write)
+
     def intRegNPC(idx):
         return ('IntReg', 'uw', idx, 'IsInteger', srtNormal)
 
@@ -120,26 +157,49 @@ let {{
     def cntrlReg(idx, id = srtNormal, type = 'uw'):
         return ('ControlReg', type, idx, None, id)
 
+    def cntrlNsBankedReg(idx, id = srtNormal, type = 'uw'):
+        return ('ControlReg', type, idx, (None, None, 'IsControl'), id, cntrlNsBankedRead, cntrlNsBankedWrite)
+
+    def cntrlNsBankedReg64(idx, id = srtNormal, type = 'ud'):
+        return ('ControlReg', type, idx, (None, None, 'IsControl'), id, cntrlNsBankedRead, cntrlNsBankedWrite)
+
     def cntrlRegNC(idx, id = srtNormal, type = 'uw'):
         return ('ControlReg', type, idx, None, id)
 
     def pcStateReg(idx, id):
-        return ('PCState', 'uw', idx, (None, None, 'IsControl'), id)
+        return ('PCState', 'ud', idx, (None, None, 'IsControl'), id)
 }};
 
 def operands {{
     #Abstracted integer reg operands
     'Dest': intReg('dest'),
+    'Dest64': intReg64('dest'),
+    'XDest': intRegX64('dest'),
+    'WDest': intRegW64('dest'),
     'IWDest': intRegIWPC('dest'),
     'AIWDest': intRegAIWPC('dest'),
     'Dest2': intReg('dest2'),
+    'XDest2': intRegX64('dest2'),
+    'FDest2': floatReg('dest2'),
     'Result': intReg('result'),
+    'XResult': intRegX64('result'),
+    'XBase': intRegX64('base', id = srtBase),
     'Base': intRegAPC('base', id = srtBase),
+    'XOffset': intRegX64('offset'),
     'Index': intReg('index'),
     'Shift': intReg('shift'),
     'Op1': intReg('op1'),
     'Op2': intReg('op2'),
     'Op3': intReg('op3'),
+    'Op164': intReg64('op1'),
+    'Op264': intReg64('op2'),
+    'Op364': intReg64('op3'),
+    'XOp1': intRegX64('op1'),
+    'XOp2': intRegX64('op2'),
+    'XOp3': intRegX64('op3'),
+    'WOp1': intRegW64('op1'),
+    'WOp2': intRegW64('op2'),
+    'WOp3': intRegW64('op3'),
     'Reg0': intReg('reg0'),
     'Reg1': intReg('reg1'),
     'Reg2': intReg('reg2'),
@@ -147,13 +207,19 @@ def operands {{
 
     #Fixed index integer reg operands
     'SpMode': intRegNPC('intRegInMode((OperatingMode)regMode, INTREG_SP)'),
+    'DecodedBankedIntReg': intRegNPC('decodeMrsMsrBankedIntRegIndex(byteMask, r)'),
     'LR': intRegNPC('INTREG_LR'),
+    'XLR': intRegX64('INTREG_X30'),
     'R7': intRegNPC('7'),
     # First four arguments are passed in registers
     'R0': intRegNPC('0'),
     'R1': intRegNPC('1'),
     'R2': intRegNPC('2'),
     'R3': intRegNPC('3'),
+    'X0': intRegX64('0'),
+    'X1': intRegX64('1'),
+    'X2': intRegX64('2'),
+    'X3': intRegX64('3'),
 
     #Pseudo integer condition code registers
     'CondCodesNZ': intRegCC('INTREG_CONDCODES_NZ'),
@@ -230,9 +296,95 @@ def operands {{
     'FpOp2P2': floatReg('(op2 + 2)'),
     'FpOp2P3': floatReg('(op2 + 3)'),
 
+    # Create AArch64 unpacked view of the FP registers
+    'AA64FpOp1P0':   floatReg('((op1 * 4) + 0)'),
+    'AA64FpOp1P1':   floatReg('((op1 * 4) + 1)'),
+    'AA64FpOp1P2':   floatReg('((op1 * 4) + 2)'),
+    'AA64FpOp1P3':   floatReg('((op1 * 4) + 3)'),
+    'AA64FpOp2P0':   floatReg('((op2 * 4) + 0)'),
+    'AA64FpOp2P1':   floatReg('((op2 * 4) + 1)'),
+    'AA64FpOp2P2':   floatReg('((op2 * 4) + 2)'),
+    'AA64FpOp2P3':   floatReg('((op2 * 4) + 3)'),
+    'AA64FpOp3P0':   floatReg('((op3 * 4) + 0)'),
+    'AA64FpOp3P1':   floatReg('((op3 * 4) + 1)'),
+    'AA64FpOp3P2':   floatReg('((op3 * 4) + 2)'),
+    'AA64FpOp3P3':   floatReg('((op3 * 4) + 3)'),
+    'AA64FpDestP0':  floatReg('((dest * 4) + 0)'),
+    'AA64FpDestP1':  floatReg('((dest * 4) + 1)'),
+    'AA64FpDestP2':  floatReg('((dest * 4) + 2)'),
+    'AA64FpDestP3':  floatReg('((dest * 4) + 3)'),
+    'AA64FpDest2P0': floatReg('((dest2 * 4) + 0)'),
+    'AA64FpDest2P1': floatReg('((dest2 * 4) + 1)'),
+    'AA64FpDest2P2': floatReg('((dest2 * 4) + 2)'),
+    'AA64FpDest2P3': floatReg('((dest2 * 4) + 3)'),
+
+    'AA64FpOp1P0V0':   floatReg('((((op1+0)) * 4) + 0)'),
+    'AA64FpOp1P1V0':   floatReg('((((op1+0)) * 4) + 1)'),
+    'AA64FpOp1P2V0':   floatReg('((((op1+0)) * 4) + 2)'),
+    'AA64FpOp1P3V0':   floatReg('((((op1+0)) * 4) + 3)'),
+
+    'AA64FpOp1P0V1':   floatReg('((((op1+1)) * 4) + 0)'),
+    'AA64FpOp1P1V1':   floatReg('((((op1+1)) * 4) + 1)'),
+    'AA64FpOp1P2V1':   floatReg('((((op1+1)) * 4) + 2)'),
+    'AA64FpOp1P3V1':   floatReg('((((op1+1)) * 4) + 3)'),
+
+    'AA64FpOp1P0V2':   floatReg('((((op1+2)) * 4) + 0)'),
+    'AA64FpOp1P1V2':   floatReg('((((op1+2)) * 4) + 1)'),
+    'AA64FpOp1P2V2':   floatReg('((((op1+2)) * 4) + 2)'),
+    'AA64FpOp1P3V2':   floatReg('((((op1+2)) * 4) + 3)'),
+
+    'AA64FpOp1P0V3':   floatReg('((((op1+3)) * 4) + 0)'),
+    'AA64FpOp1P1V3':   floatReg('((((op1+3)) * 4) + 1)'),
+    'AA64FpOp1P2V3':   floatReg('((((op1+3)) * 4) + 2)'),
+    'AA64FpOp1P3V3':   floatReg('((((op1+3)) * 4) + 3)'),
+
+    'AA64FpOp1P0V0S':   floatReg('((((op1+0)%32) * 4) + 0)'),
+    'AA64FpOp1P1V0S':   floatReg('((((op1+0)%32) * 4) + 1)'),
+    'AA64FpOp1P2V0S':   floatReg('((((op1+0)%32) * 4) + 2)'),
+    'AA64FpOp1P3V0S':   floatReg('((((op1+0)%32) * 4) + 3)'),
+
+    'AA64FpOp1P0V1S':   floatReg('((((op1+1)%32) * 4) + 0)'),
+    'AA64FpOp1P1V1S':   floatReg('((((op1+1)%32) * 4) + 1)'),
+    'AA64FpOp1P2V1S':   floatReg('((((op1+1)%32) * 4) + 2)'),
+    'AA64FpOp1P3V1S':   floatReg('((((op1+1)%32) * 4) + 3)'),
+
+    'AA64FpOp1P0V2S':   floatReg('((((op1+2)%32) * 4) + 0)'),
+    'AA64FpOp1P1V2S':   floatReg('((((op1+2)%32) * 4) + 1)'),
+    'AA64FpOp1P2V2S':   floatReg('((((op1+2)%32) * 4) + 2)'),
+    'AA64FpOp1P3V2S':   floatReg('((((op1+2)%32) * 4) + 3)'),
+
+    'AA64FpOp1P0V3S':   floatReg('((((op1+3)%32) * 4) + 0)'),
+    'AA64FpOp1P1V3S':   floatReg('((((op1+3)%32) * 4) + 1)'),
+    'AA64FpOp1P2V3S':   floatReg('((((op1+3)%32) * 4) + 2)'),
+    'AA64FpOp1P3V3S':   floatReg('((((op1+3)%32) * 4) + 3)'),
+
+    'AA64FpDestP0V0':   floatReg('((((dest+0)) * 4) + 0)'),
+    'AA64FpDestP1V0':   floatReg('((((dest+0)) * 4) + 1)'),
+    'AA64FpDestP2V0':   floatReg('((((dest+0)) * 4) + 2)'),
+    'AA64FpDestP3V0':   floatReg('((((dest+0)) * 4) + 3)'),
+
+    'AA64FpDestP0V1':   floatReg('((((dest+1)) * 4) + 0)'),
+    'AA64FpDestP1V1':   floatReg('((((dest+1)) * 4) + 1)'),
+    'AA64FpDestP2V1':   floatReg('((((dest+1)) * 4) + 2)'),
+    'AA64FpDestP3V1':   floatReg('((((dest+1)) * 4) + 3)'),
+
+    'AA64FpDestP0V0L':   floatReg('((((dest+0)%32) * 4) + 0)'),
+    'AA64FpDestP1V0L':   floatReg('((((dest+0)%32) * 4) + 1)'),
+    'AA64FpDestP2V0L':   floatReg('((((dest+0)%32) * 4) + 2)'),
+    'AA64FpDestP3V0L':   floatReg('((((dest+0)%32) * 4) + 3)'),
+
+    'AA64FpDestP0V1L':   floatReg('((((dest+1)%32) * 4) + 0)'),
+    'AA64FpDestP1V1L':   floatReg('((((dest+1)%32) * 4) + 1)'),
+    'AA64FpDestP2V1L':   floatReg('((((dest+1)%32) * 4) + 2)'),
+    'AA64FpDestP3V1L':   floatReg('((((dest+1)%32) * 4) + 3)'),
+
     #Abstracted control reg operands
     'MiscDest': cntrlReg('dest'),
     'MiscOp1': cntrlReg('op1'),
+    'MiscNsBankedDest': cntrlNsBankedReg('dest'),
+    'MiscNsBankedOp1': cntrlNsBankedReg('op1'),
+    'MiscNsBankedDest64': cntrlNsBankedReg64('dest'),
+    'MiscNsBankedOp164': cntrlNsBankedReg64('op1'),
 
     #Fixed index control regs
     'Cpsr': cntrlReg('MISCREG_CPSR', srtCpsr),
@@ -244,22 +396,41 @@ def operands {{
     'FpscrQc': cntrlRegNC('MISCREG_FPSCR_QC'),
     'FpscrExc': cntrlRegNC('MISCREG_FPSCR_EXC'),
     'Cpacr': cntrlReg('MISCREG_CPACR'),
+    'Cpacr64': cntrlReg('MISCREG_CPACR_EL1'),
     'Fpexc': cntrlRegNC('MISCREG_FPEXC'),
+    'Nsacr': cntrlReg('MISCREG_NSACR'),
+    'ElrHyp': cntrlRegNC('MISCREG_ELR_HYP'),
+    'Hcr': cntrlReg('MISCREG_HCR'),
+    'Hcr64': cntrlReg('MISCREG_HCR_EL2'),
+    'Hdcr': cntrlReg('MISCREG_HDCR'),
+    'Hcptr': cntrlReg('MISCREG_HCPTR'),
+    'CptrEl264': cntrlReg('MISCREG_CPTR_EL2'),
+    'CptrEl364': cntrlReg('MISCREG_CPTR_EL3'),
+    'Hstr': cntrlReg('MISCREG_HSTR'),
+    'Scr': cntrlReg('MISCREG_SCR'),
+    'Scr64': cntrlReg('MISCREG_SCR_EL3'),
     'Sctlr': cntrlRegNC('MISCREG_SCTLR'),
     'SevMailbox': cntrlRegNC('MISCREG_SEV_MAILBOX'),
     'LLSCLock': cntrlRegNC('MISCREG_LOCKFLAG'),
+    'Dczid' : cntrlRegNC('MISCREG_DCZID_EL0'),
 
     #Register fields for microops
     'URa' : intReg('ura'),
+    'XURa' : intRegX64('ura'),
+    'WURa' : intRegW64('ura'),
     'IWRa' : intRegIWPC('ura'),
     'Fa' : floatReg('ura'),
+    'FaP1' : floatReg('ura + 1'),
     'URb' : intReg('urb'),
+    'XURb' : intRegX64('urb'),
     'URc' : intReg('urc'),
+    'XURc' : intRegX64('urc'),
 
     #Memory Operand
     'Mem': ('Mem', 'uw', None, ('IsMemRef', 'IsLoad', 'IsStore'), srtNormal),
 
     #PCState fields
+    'RawPC': pcStateReg('pc', srtPC),
     'PC': pcStateReg('instPC', srtPC),
     'NPC': pcStateReg('instNPC', srtPC),
     'pNPC': pcStateReg('instNPC', srtEPC),
diff --git a/src/arch/arm/isa/templates/basic.isa b/src/arch/arm/isa/templates/basic.isa
index b3878b89a..de4506e05 100644
--- a/src/arch/arm/isa/templates/basic.isa
+++ b/src/arch/arm/isa/templates/basic.isa
@@ -1,5 +1,17 @@
 // -*- mode:c++ -*-
 
+// Copyright (c) 2011 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
 // Copyright (c) 2007-2008 The Florida State University
 // All rights reserved.
 //
@@ -60,6 +72,13 @@ def template BasicConstructor {{
         }
 }};
 
+def template BasicConstructor64 {{
+        inline %(class_name)s::%(class_name)s(ExtMachInst machInst)  : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
+        {
+            %(constructor)s;
+        }
+}};
+
 
 // Basic instruction class execute method template.
 def template BasicExecute {{
diff --git a/src/arch/arm/isa/templates/branch64.isa b/src/arch/arm/isa/templates/branch64.isa
new file mode 100644
index 000000000..84b3e6ae7
--- /dev/null
+++ b/src/arch/arm/isa/templates/branch64.isa
@@ -0,0 +1,141 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+def template BranchImm64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, int64_t _imm);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template BranchImm64Constructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          int64_t _imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _imm)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template BranchImmCond64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, int64_t _imm,
+                       ConditionCode _condCode);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template BranchImmCond64Constructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          int64_t _imm,
+                                          ConditionCode _condCode)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _imm, _condCode)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template BranchReg64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _op1);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template BranchReg64Constructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _op1)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template BranchImmReg64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst,
+                       int64_t imm, IntRegIndex _op1);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template BranchImmReg64Constructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          int64_t _imm,
+                                          IntRegIndex _op1)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _imm, _op1)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template BranchImmImmReg64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, int64_t _imm1, int64_t _imm2,
+                       IntRegIndex _op1);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template BranchImmImmReg64Constructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          int64_t _imm1, int64_t _imm2,
+                                          IntRegIndex _op1)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _imm1, _imm2, _op1)
+    {
+        %(constructor)s;
+    }
+}};
diff --git a/src/arch/arm/isa/templates/data64.isa b/src/arch/arm/isa/templates/data64.isa
new file mode 100644
index 000000000..b6f7ce8d0
--- /dev/null
+++ b/src/arch/arm/isa/templates/data64.isa
@@ -0,0 +1,279 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+def template DataXImmDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                IntRegIndex _op1, uint64_t _imm);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataXImmConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          uint64_t _imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _imm)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataXSRegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                IntRegIndex _op1, IntRegIndex _op2,
+                int32_t _shiftAmt, ArmShiftType _shiftType);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataXSRegConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          int32_t _shiftAmt,
+                                          ArmShiftType _shiftType)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, _shiftAmt, _shiftType)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataXERegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                IntRegIndex _op1, IntRegIndex _op2,
+                ArmExtendType _extendType, int32_t _shiftAmt);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataXERegConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          ArmExtendType _extendType,
+                                          int32_t _shiftAmt)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, _extendType, _shiftAmt)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataX1RegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                       IntRegIndex _op1);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataX1RegConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _op1)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataX2RegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                       IntRegIndex _op1, IntRegIndex _op2);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataX2RegConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataX2RegImmDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                       IntRegIndex _op1, IntRegIndex _op2, uint64_t _imm);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataX2RegImmConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          uint64_t _imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, _imm)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataX3RegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                       IntRegIndex _op1, IntRegIndex _op2, IntRegIndex _op3);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataX3RegConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          IntRegIndex _op3)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, _op3)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataXCondCompImmDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _op1,
+                       uint64_t _imm, ConditionCode _condCode, uint8_t _defCc);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataXCondCompImmConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _op1,
+                                          uint64_t _imm,
+                                          ConditionCode _condCode,
+                                          uint8_t _defCc)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _op1, _imm, _condCode, _defCc)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataXCondCompRegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _op1,
+                       IntRegIndex _op2, ConditionCode _condCode,
+                       uint8_t _defCc);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataXCondCompRegConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          ConditionCode _condCode,
+                                          uint8_t _defCc)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _op1, _op2, _condCode, _defCc)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template DataXCondSelDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                       IntRegIndex _op1, IntRegIndex _op2,
+                       ConditionCode _condCode);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template DataXCondSelConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          ConditionCode _condCode)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, _condCode)
+    {
+        %(constructor)s;
+    }
+}};
diff --git a/src/arch/arm/isa/templates/macromem.isa b/src/arch/arm/isa/templates/macromem.isa
index 195204a95..465090660 100644
--- a/src/arch/arm/isa/templates/macromem.isa
+++ b/src/arch/arm/isa/templates/macromem.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -338,6 +338,18 @@ def template MicroIntImmConstructor {{
     }
 }};
 
+def template MicroIntImmXConstructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                   RegIndex _ura,
+                                   RegIndex _urb,
+                                   int32_t _imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _ura, _urb, _imm)
+    {
+        %(constructor)s;
+    }
+}};
+
 def template MicroIntRegDeclare {{
     class %(class_name)s : public %(base_class)s
     {
@@ -349,6 +361,28 @@ def template MicroIntRegDeclare {{
     };
 }};
 
+def template MicroIntXERegConstructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                   RegIndex _ura, RegIndex _urb, RegIndex _urc,
+                                   ArmExtendType _type, uint32_t _shiftAmt)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _ura, _urb, _urc, _type, _shiftAmt)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template MicroIntXERegDeclare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+        %(class_name)s(ExtMachInst machInst,
+                       RegIndex _ura, RegIndex _urb, RegIndex _urc,
+                       ArmExtendType _type, uint32_t _shiftAmt);
+        %(BasicExecDeclare)s
+    };
+}};
+
 def template MicroIntRegConstructor {{
     %(class_name)s::%(class_name)s(ExtMachInst machInst,
                                    RegIndex _ura, RegIndex _urb, RegIndex _urc,
@@ -402,6 +436,96 @@ def template MacroMemConstructor {{
 
 }};
 
+def template BigFpMemImmDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  public:
+    // Constructor
+    %(class_name)s(const char *mnemonic, ExtMachInst machInst,
+                   bool load, IntRegIndex dest, IntRegIndex base, int64_t imm);
+    %(BasicExecPanic)s
+};
+}};
+
+def template BigFpMemImmConstructor {{
+%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst,
+        bool load, IntRegIndex dest, IntRegIndex base, int64_t imm)
+    : %(base_class)s(mnemonic, machInst, %(op_class)s, load, dest, base, imm)
+{
+    %(constructor)s;
+}
+}};
+
+def template BigFpMemRegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  public:
+    // Constructor
+    %(class_name)s(const char *mnemonic, ExtMachInst machInst,
+                   bool load, IntRegIndex dest, IntRegIndex base,
+                   IntRegIndex offset, ArmExtendType type, int64_t imm);
+    %(BasicExecPanic)s
+};
+}};
+
+def template BigFpMemRegConstructor {{
+%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst,
+        bool load, IntRegIndex dest, IntRegIndex base,
+        IntRegIndex offset, ArmExtendType type, int64_t imm)
+    : %(base_class)s(mnemonic, machInst, %(op_class)s, load, dest, base,
+                     offset, type, imm)
+{
+    %(constructor)s;
+}
+}};
+
+def template BigFpMemLitDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  public:
+    // Constructor
+    %(class_name)s(const char *mnemonic, ExtMachInst machInst,
+                   IntRegIndex dest, int64_t imm);
+    %(BasicExecPanic)s
+};
+}};
+
+def template BigFpMemLitConstructor {{
+%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst,
+        IntRegIndex dest, int64_t imm)
+    : %(base_class)s(mnemonic, machInst, %(op_class)s, dest, imm)
+{
+    %(constructor)s;
+}
+}};
+
+def template PairMemDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+    public:
+        // Constructor
+        %(class_name)s(const char *mnemonic, ExtMachInst machInst,
+                uint32_t size, bool fp, bool load, bool noAlloc, bool signExt,
+                bool exclusive, bool acrel, uint32_t imm,
+                AddrMode mode, IntRegIndex rn, IntRegIndex rt,
+                IntRegIndex rt2);
+        %(BasicExecPanic)s
+};
+}};
+
+def template PairMemConstructor {{
+%(class_name)s::%(class_name)s(const char *mnemonic, ExtMachInst machInst,
+        uint32_t size, bool fp, bool load, bool noAlloc, bool signExt,
+        bool exclusive, bool acrel, uint32_t imm, AddrMode mode,
+        IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2)
+    : %(base_class)s(mnemonic, machInst, %(op_class)s, size,
+                     fp, load, noAlloc, signExt, exclusive, acrel,
+                     imm, mode, rn, rt, rt2)
+{
+    %(constructor)s;
+}
+}};
+
 def template VMemMultDeclare {{
 class %(class_name)s : public %(base_class)s
 {
diff --git a/src/arch/arm/isa/templates/mem.isa b/src/arch/arm/isa/templates/mem.isa
index 871378f3f..7682c277d 100644
--- a/src/arch/arm/isa/templates/mem.isa
+++ b/src/arch/arm/isa/templates/mem.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010, 2012 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -697,6 +697,11 @@ def template LoadStoreImmDeclare {{
         %(InitiateAccDeclare)s
 
         %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
     };
 }};
 
@@ -763,6 +768,11 @@ def template StoreRegDeclare {{
         %(InitiateAccDeclare)s
 
         %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
     };
 }};
 
@@ -808,6 +818,11 @@ def template LoadRegDeclare {{
         %(InitiateAccDeclare)s
 
         %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
     };
 }};
 
@@ -828,6 +843,11 @@ def template LoadImmDeclare {{
         %(InitiateAccDeclare)s
 
         %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
     };
 }};
 
diff --git a/src/arch/arm/isa/templates/mem64.isa b/src/arch/arm/isa/templates/mem64.isa
new file mode 100644
index 000000000..87dcba988
--- /dev/null
+++ b/src/arch/arm/isa/templates/mem64.isa
@@ -0,0 +1,686 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+let {{
+    SPAlignmentCheckCode = '''
+        if (baseIsSP && bits(XBase, 3, 0) &&
+            SPAlignmentCheckEnabled(xc->tcBase())) {
+            return new SPAlignmentFault();
+        }
+   '''
+}};
+
+def template Load64Execute {{
+    Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+                                  Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            fault = readMemAtomic(xc, traceData, EA, Mem, memAccessFlags);
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template Store64Execute {{
+    Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+                                  Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = writeMemAtomic(xc, traceData, Mem, EA,
+                                   memAccessFlags, NULL);
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template Store64InitiateAcc {{
+    Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
+                                      Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags,
+                                   NULL);
+        }
+
+        return fault;
+    }
+}};
+
+def template StoreEx64Execute {{
+    Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+                                  Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        uint64_t writeResult = 0;
+        if (fault == NoFault) {
+            fault = writeMemAtomic(xc, traceData, Mem, EA, memAccessFlags,
+                                   &writeResult);
+        }
+
+        if (fault == NoFault) {
+            %(postacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template StoreEx64InitiateAcc {{
+    Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
+                                      Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags,
+                                   NULL);
+        }
+
+        return fault;
+    }
+}};
+
+def template Load64InitiateAcc {{
+    Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
+                                      Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_src_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            fault = readMemTiming(xc, traceData, EA, Mem, memAccessFlags);
+        }
+
+        return fault;
+    }
+}};
+
+def template Load64CompleteAcc {{
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
+                                      %(CPU_exec_context)s *xc,
+                                      Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+
+        // ARM instructions will not have a pkt if the predicate is false
+        getMem(pkt, Mem, traceData);
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template Store64CompleteAcc {{
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
+                                      %(CPU_exec_context)s *xc,
+                                      Trace::InstRecord *traceData) const
+    {
+        return NoFault;
+    }
+}};
+
+def template StoreEx64CompleteAcc {{
+    Fault %(class_name)s::completeAcc(PacketPtr pkt,
+                                      %(CPU_exec_context)s *xc,
+                                      Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+
+        uint64_t writeResult = pkt->req->getExtraData();
+        %(postacc_code)s;
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template DCStore64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _base, IntRegIndex _dest, uint64_t _imm);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template DCStore64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst, IntRegIndex _base, IntRegIndex _dest, uint64_t _imm)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 (IntRegIndex)_base, _dest, _imm)
+    {
+        %(constructor)s;
+        assert(!%(use_uops)d);
+    }
+}};
+
+def template DCStore64Execute {{
+    Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+                                  Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = xc->writeMem(NULL, op_size, EA, memAccessFlags, NULL);
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template DCStore64InitiateAcc {{
+    Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
+                                      Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = xc->writeMem(NULL, op_size, EA, memAccessFlags, NULL);
+        }
+
+        return fault;
+    }
+}};
+
+
+def template LoadStoreImm64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst,
+                IntRegIndex _dest, IntRegIndex _base, int64_t _imm);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template LoadStoreImmU64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst,
+                IntRegIndex _dest, IntRegIndex _base, int64_t _imm,
+                bool noAlloc = false, bool exclusive = false,
+                bool acrel = false);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template LoadStoreImmDU64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst,
+                IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base,
+                int64_t _imm = 0, bool noAlloc = false, bool exclusive = false,
+                bool acrel = false);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template StoreImmDEx64Declare {{
+    /**
+     * Static instruction class for "%(mnemonic)s".
+     */
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst,
+                IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2,
+                IntRegIndex _base, int64_t _imm = 0);
+
+        %(BasicExecDeclare)s
+
+        %(InitiateAccDeclare)s
+
+        %(CompleteAccDeclare)s
+    };
+}};
+
+
+def template LoadStoreReg64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst,
+                IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset,
+                ArmExtendType _type, uint32_t _shiftAmt);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template LoadStoreRegU64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst,
+                IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset,
+                ArmExtendType _type, uint32_t _shiftAmt,
+                bool noAlloc = false, bool exclusive = false,
+                bool acrel = false);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template LoadStoreRaw64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                       IntRegIndex _base);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template LoadStoreEx64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                       IntRegIndex _base, IntRegIndex _result);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template LoadStoreLit64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, int64_t _imm);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template LoadStoreLitU64Declare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest, int64_t _imm,
+                bool noAlloc = false, bool exclusive = false,
+                bool acrel = false);
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+
+        virtual void
+        annotateFault(ArmFault *fault) {
+            %(fa_code)s
+        }
+    };
+}};
+
+def template LoadStoreImm64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _base, int64_t _imm)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 (IntRegIndex)_dest, (IntRegIndex)_base, _imm)
+    {
+        %(constructor)s;
+#if %(use_uops)d
+        assert(numMicroops >= 2);
+        uops = new StaticInstPtr[numMicroops];
+        uops[0] = new %(acc_name)s(machInst, _dest, _base, _imm);
+        uops[0]->setDelayedCommit();
+        uops[1] = new %(wb_decl)s;
+        uops[1]->setLastMicroop();
+#endif
+    }
+}};
+
+def template LoadStoreImmU64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _base, int64_t _imm,
+            bool noAlloc, bool exclusive, bool acrel)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 _dest, _base, _imm)
+    {
+        %(constructor)s;
+        assert(!%(use_uops)d);
+        setExcAcRel(exclusive, acrel);
+    }
+}};
+
+def template LoadStoreImmDU64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _dest2, IntRegIndex _base,
+            int64_t _imm, bool noAlloc, bool exclusive, bool acrel)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 _dest, _dest2, _base, _imm)
+    {
+        %(constructor)s;
+        assert(!%(use_uops)d);
+        setExcAcRel(exclusive, acrel);
+    }
+}};
+
+def template StoreImmDEx64Constructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _result, IntRegIndex _dest, IntRegIndex _dest2,
+            IntRegIndex _base, int64_t _imm)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 _result, _dest, _dest2, _base, _imm)
+    {
+        %(constructor)s;
+        assert(!%(use_uops)d);
+    }
+}};
+
+
+def template LoadStoreReg64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset,
+            ArmExtendType _type, uint32_t _shiftAmt)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 _dest, _base, _offset, _type, _shiftAmt)
+    {
+        %(constructor)s;
+#if %(use_uops)d
+        assert(numMicroops >= 2);
+        uops = new StaticInstPtr[numMicroops];
+        uops[0] = new %(acc_name)s(machInst, _dest, _base, _offset,
+                                   _type, _shiftAmt);
+        uops[0]->setDelayedCommit();
+        uops[1] = new %(wb_decl)s;
+        uops[1]->setLastMicroop();
+#endif
+    }
+}};
+
+def template LoadStoreRegU64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _base, IntRegIndex _offset,
+            ArmExtendType _type, uint32_t _shiftAmt,
+            bool noAlloc, bool exclusive, bool acrel)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 _dest, _base, _offset, _type, _shiftAmt)
+    {
+        %(constructor)s;
+        assert(!%(use_uops)d);
+        setExcAcRel(exclusive, acrel);
+    }
+}};
+
+def template LoadStoreRaw64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _base)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _base)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template LoadStoreEx64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, IntRegIndex _base, IntRegIndex _result)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                          _dest, _base, _result)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template LoadStoreLit64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, int64_t _imm)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 (IntRegIndex)_dest, _imm)
+    {
+        %(constructor)s;
+#if %(use_uops)d
+        assert(numMicroops >= 2);
+        uops = new StaticInstPtr[numMicroops];
+        uops[0] = new %(acc_name)s(machInst, _dest, _imm);
+        uops[0]->setDelayedCommit();
+        uops[1] = new %(wb_decl)s;
+        uops[1]->setLastMicroop();
+#endif
+    }
+}};
+
+def template LoadStoreLitU64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, int64_t _imm,
+            bool noAlloc, bool exclusive, bool acrel)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                 (IntRegIndex)_dest, _imm)
+    {
+        %(constructor)s;
+        assert(!%(use_uops)d);
+        setExcAcRel(exclusive, acrel);
+    }
+}};
diff --git a/src/arch/arm/isa/templates/misc.isa b/src/arch/arm/isa/templates/misc.isa
index 212897aa0..36db5b6c2 100644
--- a/src/arch/arm/isa/templates/misc.isa
+++ b/src/arch/arm/isa/templates/misc.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -62,6 +62,69 @@ def template MrsConstructor {{
     }
 }};
 
+def template MrsBankedRegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    uint8_t byteMask;
+    bool    r;
+
+  public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _dest,
+                       uint8_t _sysM, bool _r);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template MrsBankedRegConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          uint8_t     _sysM,
+                                          bool        _r)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest),
+          byteMask(_sysM), r(_r)
+    {
+        %(constructor)s;
+        if (!(condCode == COND_AL || condCode == COND_UC)) {
+            for (int x = 0; x < _numDestRegs; x++) {
+                _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+            }
+        }
+    }
+}};
+
+def template MsrBankedRegDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    bool r;
+
+  public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _op1,
+                       uint8_t _sysM, bool _r);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template MsrBankedRegConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _op1,
+                                          uint8_t     _sysM,
+                                          bool        _r)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1, _sysM),
+          r(_r)
+    {
+        %(constructor)s;
+        if (!(condCode == COND_AL || condCode == COND_UC)) {
+            for (int x = 0; x < _numDestRegs; x++) {
+                _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+            }
+        }
+    }
+}};
+
 def template MsrRegDeclare {{
 class %(class_name)s : public %(base_class)s
 {
@@ -114,6 +177,66 @@ def template MsrImmConstructor {{
     }
 }};
 
+def template MrrcOpDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _op1,
+                       IntRegIndex _dest, IntRegIndex _dest2, uint32_t imm);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template MrrcOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex op1,
+                                          IntRegIndex dest,
+                                          IntRegIndex dest2,
+                                          uint32_t    imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, op1, dest,
+                         dest2, imm)
+    {
+        %(constructor)s;
+        if (!(condCode == COND_AL || condCode == COND_UC)) {
+            for (int x = 0; x < _numDestRegs; x++) {
+                _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+            }
+        }
+    }
+}};
+
+def template McrrOpDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, IntRegIndex _op1, IntRegIndex _op2,
+                       IntRegIndex _dest, uint32_t imm);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template McrrOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex op1,
+                                          IntRegIndex op2,
+                                          IntRegIndex dest,
+                                          uint32_t    imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, op1, op2,
+                         dest, imm)
+    {
+        %(constructor)s;
+        if (!(condCode == COND_AL || condCode == COND_UC)) {
+            for (int x = 0; x < _numDestRegs; x++) {
+                _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+            }
+        }
+    }
+}};
+
 def template ImmOpDeclare {{
 class %(class_name)s : public %(base_class)s
 {
@@ -310,6 +433,35 @@ def template RegRegImmOpConstructor {{
     }
 }};
 
+def template RegImmImmOpDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst,
+                       IntRegIndex _dest, uint64_t _imm1, uint64_t _imm2);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template RegImmImmOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          uint64_t _imm1,
+                                          uint64_t _imm2)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _imm1, _imm2)
+    {
+        %(constructor)s;
+        if (!(condCode == COND_AL || condCode == COND_UC)) {
+            for (int x = 0; x < _numDestRegs; x++) {
+                _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+            }
+        }
+    }
+}};
+
 def template RegRegImmImmOpDeclare {{
 class %(class_name)s : public %(base_class)s
 {
diff --git a/src/arch/arm/isa/templates/misc64.isa b/src/arch/arm/isa/templates/misc64.isa
new file mode 100644
index 000000000..09d3d4470
--- /dev/null
+++ b/src/arch/arm/isa/templates/misc64.isa
@@ -0,0 +1,91 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2011 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Gabe Black
+
+def template RegRegImmImmOp64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst,
+                       IntRegIndex _dest, IntRegIndex _op1,
+                       uint64_t _imm1, uint64_t _imm2);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template RegRegImmImmOp64Constructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          uint64_t _imm1,
+                                          uint64_t _imm2)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _imm1, _imm2)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template RegRegRegImmOp64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst,
+                       IntRegIndex _dest, IntRegIndex _op1,
+                       IntRegIndex _op2, uint64_t _imm);
+        %(BasicExecDeclare)s
+};
+}};
+
+def template RegRegRegImmOp64Constructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          uint64_t _imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, _imm)
+    {
+        %(constructor)s;
+    }
+}};
+
diff --git a/src/arch/arm/isa/templates/neon.isa b/src/arch/arm/isa/templates/neon.isa
index 573d245b8..ffa6b53d4 100644
--- a/src/arch/arm/isa/templates/neon.isa
+++ b/src/arch/arm/isa/templates/neon.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2012 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -39,8 +39,26 @@
 
 let {{
     simdEnabledCheckCode = '''
-        if (!neonEnabled(Cpacr, Cpsr, Fpexc))
-            return disabledFault();
+    {
+        uint32_t issEnCheck;
+        bool trapEnCheck;
+        uint32_t seq;
+        if (!vfpNeonEnabled(seq, Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck,
+                            trapEnCheck, xc->tcBase(), Fpexc, true))
+            {return disabledFault();}
+        if (trapEnCheck) {
+            CPSR cpsrEnCheck = Cpsr;
+            if (cpsrEnCheck.mode == MODE_HYP) {
+                return new UndefinedInstruction(machInst, issEnCheck,
+                                                EC_TRAPPED_HCPTR);
+            } else {
+                if (!inSecureState(Scr, Cpsr)) {
+                    return new HypervisorTrap(machInst, issEnCheck,
+                                              EC_TRAPPED_HCPTR);
+                }
+            }
+        }
+    }
     '''
 }};
 
diff --git a/src/arch/arm/isa/templates/neon64.isa b/src/arch/arm/isa/templates/neon64.isa
new file mode 100644
index 000000000..d20e4e653
--- /dev/null
+++ b/src/arch/arm/isa/templates/neon64.isa
@@ -0,0 +1,527 @@
+// -*- mode: c++ -*-
+
+// Copyright (c) 2012-2013 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Mbou Eyole
+//          Giacomo Gabrielli
+
+let {{
+    simd64EnabledCheckCode = vfp64EnabledCheckCode
+}};
+
+def template NeonX2RegOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    typedef _Element Element;
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2)
+    {
+        %(constructor)s;
+    }
+
+    %(BasicExecDeclare)s
+};
+}};
+
+def template NeonX2RegImmOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    typedef _Element Element;
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                   uint64_t _imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, _imm)
+    {
+        %(constructor)s;
+    }
+
+    %(BasicExecDeclare)s
+};
+}};
+
+def template NeonX1RegOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    typedef _Element Element;
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   IntRegIndex _dest, IntRegIndex _op1)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1)
+    {
+        %(constructor)s;
+    }
+
+    %(BasicExecDeclare)s
+};
+}};
+
+def template NeonX1RegImmOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    typedef _Element Element;
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _imm)
+    {
+        %(constructor)s;
+    }
+
+    %(BasicExecDeclare)s
+};
+}};
+
+def template NeonX1Reg2ImmOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    typedef _Element Element;
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm1,
+                   uint64_t _imm2)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _imm1, _imm2)
+    {
+        %(constructor)s;
+    }
+
+    %(BasicExecDeclare)s
+};
+}};
+
+def template NeonX1RegImmOnlyOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  protected:
+    typedef _Element Element;
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   IntRegIndex _dest, uint64_t _imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _imm)
+    {
+        %(constructor)s;
+    }
+
+    %(BasicExecDeclare)s
+};
+}};
+
+def template NeonXExecDeclare {{
+    template
+    Fault %(class_name)s<%(targs)s>::execute(
+            %(CPU_exec_context)s *, Trace::InstRecord *) const;
+}};
+
+def template NeonXEqualRegOpExecute {{
+    template <class Element>
+    Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
+            Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        %(op_decl)s;
+        %(op_rd)s;
+
+        const unsigned rCount = %(r_count)d;
+        const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element);
+        const unsigned eCountFull = 4 * sizeof(FloatRegBits) / sizeof(Element);
+
+        union RegVect {
+            FloatRegBits regs[rCount];
+            Element elements[eCount];
+        };
+
+        union FullRegVect {
+            FloatRegBits regs[4];
+            Element elements[eCountFull];
+        };
+
+        %(code)s;
+        if (fault == NoFault)
+        {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template NeonXUnequalRegOpExecute {{
+    template <class Element>
+    Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
+            Trace::InstRecord *traceData) const
+    {
+        typedef typename bigger_type_t<Element>::type BigElement;
+        Fault fault = NoFault;
+        %(op_decl)s;
+        %(op_rd)s;
+
+        const unsigned rCount = %(r_count)d;
+        const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element);
+        const unsigned eCountFull = 4 * sizeof(FloatRegBits) / sizeof(Element);
+
+        union RegVect {
+            FloatRegBits regs[rCount];
+            Element elements[eCount];
+            BigElement bigElements[eCount / 2];
+        };
+
+        union BigRegVect {
+            FloatRegBits regs[2 * rCount];
+            BigElement elements[eCount];
+        };
+
+        union FullRegVect {
+            FloatRegBits regs[4];
+            Element elements[eCountFull];
+        };
+
+        %(code)s;
+        if (fault == NoFault)
+        {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template MicroNeonMemDeclare64 {{
+    class %(class_name)s : public %(base_class)s
+    {
+      protected:
+        // True if the base register is SP (used for SP alignment checking)
+        bool baseIsSP;
+        // Access size in bytes
+        uint8_t accSize;
+        // Vector element size (0 -> 8-bit, 1 -> 16-bit, 2 -> 32-bit,
+        // 3 -> 64-bit)
+        uint8_t eSize;
+
+      public:
+        %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _ura,
+                       uint32_t _imm, unsigned extraMemFlags, bool _baseIsSP,
+                       uint8_t _accSize, uint8_t _eSize)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest,
+                             _ura, _imm),
+            baseIsSP(_baseIsSP), accSize(_accSize), eSize(_eSize)
+        {
+            memAccessFlags |= extraMemFlags;
+            %(constructor)s;
+        }
+
+        %(BasicExecDeclare)s
+        %(InitiateAccDeclare)s
+        %(CompleteAccDeclare)s
+    };
+}};
+
+def template NeonLoadExecute64 {{
+    Fault %(class_name)s::execute(
+        %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(mem_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        MemUnion memUnion;
+        uint8_t *dataPtr = memUnion.bytes;
+
+        if (fault == NoFault) {
+            fault = xc->readMem(EA, dataPtr, accSize, memAccessFlags);
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template NeonLoadInitiateAcc64 {{
+    Fault %(class_name)s::initiateAcc(
+        %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(mem_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        MemUnion memUnion;
+        uint8_t *dataPtr = memUnion.bytes;
+
+        if (fault == NoFault) {
+            fault = xc->readMem(EA, dataPtr, accSize, memAccessFlags);
+        }
+
+        return fault;
+    }
+}};
+
+def template NeonLoadCompleteAcc64 {{
+    Fault %(class_name)s::completeAcc(
+        PacketPtr pkt, %(CPU_exec_context)s *xc,
+        Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(mem_decl)s;
+        %(op_decl)s;
+        %(op_rd)s;
+
+        MemUnion &memUnion = *(MemUnion *)pkt->getPtr<uint8_t>();
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template NeonStoreExecute64 {{
+    Fault %(class_name)s::execute(
+        %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(mem_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        MemUnion memUnion;
+        uint8_t *dataPtr = memUnion.bytes;
+
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = xc->writeMem(dataPtr, accSize, EA, memAccessFlags,
+                                 NULL);
+        }
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template NeonStoreInitiateAcc64 {{
+    Fault %(class_name)s::initiateAcc(
+        %(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
+    {
+        Addr EA;
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(mem_decl)s;
+        %(op_rd)s;
+        %(ea_code)s;
+
+        MemUnion memUnion;
+        if (fault == NoFault) {
+            %(memacc_code)s;
+        }
+
+        if (fault == NoFault) {
+            fault = xc->writeMem(memUnion.bytes, accSize, EA, memAccessFlags,
+                                 NULL);
+        }
+
+        return fault;
+    }
+}};
+
+def template NeonStoreCompleteAcc64 {{
+    Fault %(class_name)s::completeAcc(
+        PacketPtr pkt, %(CPU_exec_context)s *xc,
+        Trace::InstRecord *traceData) const
+    {
+        return NoFault;
+    }
+}};
+
+def template VMemMultDeclare64 {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, RegIndex rn, RegIndex vd,
+                       RegIndex rm, uint8_t eSize, uint8_t dataSize,
+                       uint8_t numStructElems, uint8_t numRegs, bool wb);
+        %(BasicExecPanic)s
+    };
+}};
+
+def template VMemSingleDeclare64 {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+        // Constructor
+        %(class_name)s(ExtMachInst machInst, RegIndex rn, RegIndex vd,
+                       RegIndex rm, uint8_t eSize, uint8_t dataSize,
+                       uint8_t numStructElems, uint8_t index, bool wb,
+                       bool replicate = false);
+        %(BasicExecPanic)s
+    };
+}};
+
+def template VMemMultConstructor64 {{
+    %(class_name)s::%(class_name)s(
+        ExtMachInst machInst, RegIndex rn, RegIndex vd, RegIndex rm,
+        uint8_t _eSize, uint8_t _dataSize, uint8_t _numStructElems,
+        uint8_t _numRegs, bool _wb) :
+            %(base_class)s(
+                "%(mnemonic)s", machInst, %(op_class)s, rn, vd, rm,
+                _eSize, _dataSize, _numStructElems, _numRegs, _wb)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template VMemSingleConstructor64 {{
+    %(class_name)s::%(class_name)s(
+        ExtMachInst machInst, RegIndex rn, RegIndex vd, RegIndex rm,
+        uint8_t _eSize, uint8_t _dataSize, uint8_t _numStructElems,
+        uint8_t _index, bool _wb, bool _replicate) :
+            %(base_class)s(
+                "%(mnemonic)s", machInst, %(op_class)s, rn, vd, rm,
+                _eSize, _dataSize, _numStructElems, _index, _wb,
+                _replicate)
+    {
+        %(constructor)s;
+    }
+}};
+
+def template MicroNeonMixDeclare64 {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+        %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1,
+                       uint8_t _eSize, uint8_t _dataSize,
+                       uint8_t _numStructElems, uint8_t _numRegs,
+                       uint8_t _step) :
+            %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                           _dest, _op1, _eSize, _dataSize, _numStructElems,
+                           _numRegs, _step)
+        {
+            %(constructor)s;
+        }
+
+        %(BasicExecDeclare)s
+    };
+}};
+
+def template MicroNeonMixLaneDeclare64 {{
+    class %(class_name)s : public %(base_class)s
+    {
+      public:
+        %(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1,
+                       uint8_t _eSize, uint8_t _dataSize,
+                       uint8_t _numStructElems, uint8_t _lane, uint8_t _step,
+                       bool _replicate = false) :
+            %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                           _dest, _op1, _eSize, _dataSize, _numStructElems,
+                           _lane, _step, _replicate)
+        {
+            %(constructor)s;
+        }
+
+        %(BasicExecDeclare)s
+    };
+}};
+
+def template MicroNeonMixExecute64 {{
+    Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
+            Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        uint64_t resTemp = 0;
+        resTemp = resTemp;
+        %(op_decl)s;
+        %(op_rd)s;
+
+        %(code)s;
+        if (fault == NoFault)
+        {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
diff --git a/src/arch/arm/isa/templates/templates.isa b/src/arch/arm/isa/templates/templates.isa
index 148139225..2263cdff4 100644
--- a/src/arch/arm/isa/templates/templates.isa
+++ b/src/arch/arm/isa/templates/templates.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2011 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -40,26 +40,37 @@
 //Basic instruction templates
 ##include "basic.isa"
 
+//Templates for AArch64 bit data instructions.
+##include "data64.isa"
+
 //Templates for predicated instructions
 ##include "pred.isa"
 
 //Templates for memory instructions
 ##include "mem.isa"
 
+//Templates for AArch64 memory instructions
+##include "mem64.isa"
+
 //Miscellaneous instructions that don't fit elsewhere
 ##include "misc.isa"
+##include "misc64.isa"
 
 //Templates for microcoded memory instructions
 ##include "macromem.isa"
 
 //Templates for branches
 ##include "branch.isa"
+##include "branch64.isa"
 
 //Templates for multiplies
 ##include "mult.isa"
 
 //Templates for VFP instructions
 ##include "vfp.isa"
+##include "vfp64.isa"
 
 //Templates for Neon instructions
 ##include "neon.isa"
+
+##include "neon64.isa"
diff --git a/src/arch/arm/isa/templates/vfp.isa b/src/arch/arm/isa/templates/vfp.isa
index 90dd751ff..176b6604c 100644
--- a/src/arch/arm/isa/templates/vfp.isa
+++ b/src/arch/arm/isa/templates/vfp.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -39,32 +39,117 @@
 
 let {{
     vfpEnabledCheckCode = '''
-        if (!vfpEnabled(Cpacr, Cpsr, Fpexc))
-            return disabledFault();
+        uint32_t issEnCheck;
+        bool trapEnCheck;
+        uint32_t seq;
+        if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck,
+                            trapEnCheck, xc->tcBase(), Fpexc))
+            {return disabledFault();}
+        if (trapEnCheck) {
+            CPSR cpsrEnCheck = Cpsr;
+            if (cpsrEnCheck.mode == MODE_HYP) {
+                return new UndefinedInstruction(machInst, issEnCheck,
+                                                EC_TRAPPED_HCPTR);
+            } else {
+                if (!inSecureState(Scr, Cpsr)) {
+                    return new HypervisorTrap(machInst, issEnCheck,
+                                              EC_TRAPPED_HCPTR);
+                }
+            }
+        }
+    '''
+
+    vfp64EnabledCheckCode = '''
+        CPSR cpsrEnCheck = Cpsr;
+        ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsrEnCheck.el;
+        if (!vfpNeon64Enabled(Cpacr64, el))
+             return new SupervisorTrap(machInst, 0x1E00000,
+                                       EC_TRAPPED_SIMD_FP);
+
+        if (ArmSystem::haveVirtualization(xc->tcBase()) && el <= EL2) {
+            HCPTR cptrEnCheck = xc->tcBase()->readMiscReg(MISCREG_CPTR_EL2);
+            if (cptrEnCheck.tfp)
+                return new HypervisorTrap(machInst, 0x1E00000,
+                                          EC_TRAPPED_SIMD_FP);
+        }
+
+        if (ArmSystem::haveSecurity(xc->tcBase())) {
+            HCPTR cptrEnCheck = xc->tcBase()->readMiscReg(MISCREG_CPTR_EL3);
+            if (cptrEnCheck.tfp)
+                return new SecureMonitorTrap(machInst, 0x1E00000,
+                                             EC_TRAPPED_SIMD_FP);
+        }
     '''
 
     vmsrEnabledCheckCode = '''
-        if (!vfpEnabled(Cpacr, Cpsr))
+        uint32_t issEnCheck;
+        bool trapEnCheck;
+        uint32_t seq;
+        if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck,
+                            trapEnCheck, xc->tcBase()))
             if (dest != (int)MISCREG_FPEXC && dest != (int)MISCREG_FPSID)
-                return disabledFault();
+                {return disabledFault();}
         if (!inPrivilegedMode(Cpsr))
             if (dest != (int)MISCREG_FPSCR)
                 return disabledFault();
-
+        if (trapEnCheck) {
+            CPSR cpsrEnCheck = Cpsr;
+            if (cpsrEnCheck.mode == MODE_HYP) {
+                return new UndefinedInstruction(machInst, issEnCheck,
+                                                EC_TRAPPED_HCPTR);
+            } else {
+                if (!inSecureState(Scr, Cpsr)) {
+                    return new HypervisorTrap(machInst, issEnCheck,
+                                              EC_TRAPPED_HCPTR);
+                }
+            }
+        }
     '''
 
     vmrsEnabledCheckCode = '''
-        if (!vfpEnabled(Cpacr, Cpsr))
+        uint32_t issEnCheck;
+        bool trapEnCheck;
+        uint32_t seq;
+        if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck,
+                            trapEnCheck, xc->tcBase()))
             if (op1 != (int)MISCREG_FPEXC && op1 != (int)MISCREG_FPSID &&
                 op1 != (int)MISCREG_MVFR0 && op1 != (int)MISCREG_MVFR1)
-                return disabledFault();
+                {return disabledFault();}
         if (!inPrivilegedMode(Cpsr))
             if (op1 != (int)MISCREG_FPSCR)
                 return disabledFault();
+        if (trapEnCheck) {
+            CPSR cpsrEnCheck = Cpsr;
+            if (cpsrEnCheck.mode == MODE_HYP) {
+                return new UndefinedInstruction(machInst, issEnCheck,
+                                                EC_TRAPPED_HCPTR);
+            } else {
+                if (!inSecureState(Scr, Cpsr)) {
+                    return new HypervisorTrap(machInst, issEnCheck,
+                                              EC_TRAPPED_HCPTR);
+                }
+            }
+        }
     '''
     vmrsApsrEnabledCheckCode = '''
-        if (!vfpEnabled(Cpacr, Cpsr))
-                return disabledFault();
+        uint32_t issEnCheck;
+        bool trapEnCheck;
+        uint32_t seq;
+        if (!vfpNeonEnabled(seq,Hcptr, Nsacr, Cpacr, Cpsr, issEnCheck,
+                            trapEnCheck, xc->tcBase()))
+            {return disabledFault();}
+        if (trapEnCheck) {
+            CPSR cpsrEnCheck = Cpsr;
+            if (cpsrEnCheck.mode == MODE_HYP) {
+                return new UndefinedInstruction(machInst, issEnCheck,
+                                                EC_TRAPPED_HCPTR);
+            } else {
+                if (!inSecureState(Scr, Cpsr)) {
+                    return new HypervisorTrap(machInst, issEnCheck,
+                                              EC_TRAPPED_HCPTR);
+                }
+            }
+        }
     '''
 }};
 
diff --git a/src/arch/arm/isa/templates/vfp64.isa b/src/arch/arm/isa/templates/vfp64.isa
new file mode 100644
index 000000000..518cedaae
--- /dev/null
+++ b/src/arch/arm/isa/templates/vfp64.isa
@@ -0,0 +1,140 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2012 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Thomas Grocutt
+
+def template AA64FpRegRegOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest, IntRegIndex _op1,
+                                          VfpMicroMode mode)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                _dest, _op1, mode)
+    {
+        %(constructor)s;
+        for (int x = 0; x < _numDestRegs; x++) {
+            _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+        }
+    }
+}};
+
+def template AA64FpRegRegOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest, IntRegIndex _op1,
+                                          VfpMicroMode mode)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                _dest, _op1, mode)
+    {
+        %(constructor)s;
+        for (int x = 0; x < _numDestRegs; x++) {
+            _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+        }
+    }
+}};
+
+def template AA64FpRegImmOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+            IntRegIndex _dest, uint64_t _imm, VfpMicroMode mode)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                _dest, _imm, mode)
+    {
+        %(constructor)s;
+        for (int x = 0; x < _numDestRegs; x++) {
+            _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+        }
+    }
+}};
+
+def template AA64FpRegRegImmOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          uint64_t _imm,
+                                          VfpMicroMode mode)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _imm, mode)
+    {
+        %(constructor)s;
+        for (int x = 0; x < _numDestRegs; x++) {
+            _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+        }
+    }
+}};
+
+def template AA64FpRegRegRegOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          VfpMicroMode mode)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, mode)
+    {
+        %(constructor)s;
+        for (int x = 0; x < _numDestRegs; x++) {
+            _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+        }
+    }
+}};
+
+def template AA64FpRegRegRegRegOpDeclare {{
+class %(class_name)s : public %(base_class)s
+{
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
+                   IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop);
+    %(BasicExecDeclare)s
+};
+}};
+
+def template AA64FpRegRegRegRegOpConstructor {{
+    inline %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                          IntRegIndex _dest,
+                                          IntRegIndex _op1,
+                                          IntRegIndex _op2,
+                                          IntRegIndex _op3,
+                                          VfpMicroMode mode)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2, _op3, mode)
+    {
+        %(constructor)s;
+        for (int x = 0; x < _numDestRegs; x++) {
+                _srcRegIdx[_numSrcRegs++] = _destRegIdx[x];
+        }
+    }
+}};
diff --git a/src/arch/arm/isa_traits.hh b/src/arch/arm/isa_traits.hh
index 742ca2037..506c5009c 100644
--- a/src/arch/arm/isa_traits.hh
+++ b/src/arch/arm/isa_traits.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -95,6 +95,9 @@ namespace ArmISA
 
     const Addr PAddrImplMask = (ULL(1) << PABits) - 1;
 
+    // Max. physical address range in bits supported by the architecture
+    const unsigned MaxPhysAddrRange = 48;
+
     // return a no-op instruction... used for instruction fetch faults
     const ExtMachInst NoopMachInst = 0x01E320F000ULL;
 
@@ -124,6 +127,8 @@ namespace ArmISA
         INT_IRQ,
         INT_FIQ,
         INT_SEV, // Special interrupt for recieving SEV's
+        INT_VIRT_IRQ,
+        INT_VIRT_FIQ,
         NumInterruptTypes
     };
 } // namespace ArmISA
diff --git a/src/arch/arm/linux/linux.cc b/src/arch/arm/linux/linux.cc
index 1e3a1e725..62519d38b 100644
--- a/src/arch/arm/linux/linux.cc
+++ b/src/arch/arm/linux/linux.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  * Copyright (c) 2007-2008 The Florida State University
  * All rights reserved.
@@ -34,55 +46,108 @@
 #include "arch/arm/linux/linux.hh"
 
 // open(2) flags translation table
-OpenFlagTransTable ArmLinux::openFlagTable[] = {
+OpenFlagTransTable ArmLinux32::openFlagTable[] = {
 #ifdef _MSC_VER
-  { ArmLinux::TGT_O_RDONLY,     _O_RDONLY },
-  { ArmLinux::TGT_O_WRONLY,     _O_WRONLY },
-  { ArmLinux::TGT_O_RDWR,       _O_RDWR },
-  { ArmLinux::TGT_O_APPEND,     _O_APPEND },
-  { ArmLinux::TGT_O_CREAT,      _O_CREAT },
-  { ArmLinux::TGT_O_TRUNC,      _O_TRUNC },
-  { ArmLinux::TGT_O_EXCL,       _O_EXCL },
+  { ArmLinux32::TGT_O_RDONLY,     _O_RDONLY },
+  { ArmLinux32::TGT_O_WRONLY,     _O_WRONLY },
+  { ArmLinux32::TGT_O_RDWR,       _O_RDWR },
+  { ArmLinux32::TGT_O_APPEND,     _O_APPEND },
+  { ArmLinux32::TGT_O_CREAT,      _O_CREAT },
+  { ArmLinux32::TGT_O_TRUNC,      _O_TRUNC },
+  { ArmLinux32::TGT_O_EXCL,       _O_EXCL },
 #ifdef _O_NONBLOCK
-  { ArmLinux::TGT_O_NONBLOCK,   _O_NONBLOCK },
+  { ArmLinux32::TGT_O_NONBLOCK,   _O_NONBLOCK },
 #endif
 #ifdef _O_NOCTTY
-  { ArmLinux::TGT_O_NOCTTY,     _O_NOCTTY },
+  { ArmLinux32::TGT_O_NOCTTY,     _O_NOCTTY },
 #endif
 #ifdef _O_SYNC
-  { ArmLinux::TGT_O_SYNC,       _O_SYNC },
+  { ArmLinux32::TGT_O_SYNC,       _O_SYNC },
 #endif
 #else /* !_MSC_VER */
-  { ArmLinux::TGT_O_RDONLY,     O_RDONLY },
-  { ArmLinux::TGT_O_WRONLY,     O_WRONLY },
-  { ArmLinux::TGT_O_RDWR,       O_RDWR },
-  { ArmLinux::TGT_O_CREAT,      O_CREAT },
-  { ArmLinux::TGT_O_EXCL,       O_EXCL },
-  { ArmLinux::TGT_O_NOCTTY,     O_NOCTTY },
-  { ArmLinux::TGT_O_TRUNC,      O_TRUNC },
-  { ArmLinux::TGT_O_APPEND,     O_APPEND },
-  { ArmLinux::TGT_O_NONBLOCK,   O_NONBLOCK },
+  { ArmLinux32::TGT_O_RDONLY,     O_RDONLY },
+  { ArmLinux32::TGT_O_WRONLY,     O_WRONLY },
+  { ArmLinux32::TGT_O_RDWR,       O_RDWR },
+  { ArmLinux32::TGT_O_CREAT,      O_CREAT },
+  { ArmLinux32::TGT_O_EXCL,       O_EXCL },
+  { ArmLinux32::TGT_O_NOCTTY,     O_NOCTTY },
+  { ArmLinux32::TGT_O_TRUNC,      O_TRUNC },
+  { ArmLinux32::TGT_O_APPEND,     O_APPEND },
+  { ArmLinux32::TGT_O_NONBLOCK,   O_NONBLOCK },
 #ifdef O_SYNC
-  { ArmLinux::TGT_O_SYNC,       O_SYNC },
+  { ArmLinux32::TGT_O_SYNC,       O_SYNC },
 #endif
 #ifdef FASYNC
-  { ArmLinux::TGT_FASYNC,       FASYNC },
+  { ArmLinux32::TGT_FASYNC,       FASYNC },
 #endif
 #ifdef O_DIRECT
-  { ArmLinux::TGT_O_DIRECT,     O_DIRECT },
+  { ArmLinux32::TGT_O_DIRECT,     O_DIRECT },
 #endif
 #ifdef O_LARGEFILE
-  { ArmLinux::TGT_O_LARGEFILE,  O_LARGEFILE },
+  { ArmLinux32::TGT_O_LARGEFILE,  O_LARGEFILE },
 #endif
 #ifdef O_DIRECTORY
-  { ArmLinux::TGT_O_DIRECTORY,  O_DIRECTORY },
+  { ArmLinux32::TGT_O_DIRECTORY,  O_DIRECTORY },
 #endif
 #ifdef O_NOFOLLOW
-  { ArmLinux::TGT_O_NOFOLLOW,   O_NOFOLLOW },
+  { ArmLinux32::TGT_O_NOFOLLOW,   O_NOFOLLOW },
 #endif
 #endif /* _MSC_VER */
 };
 
-const int ArmLinux::NUM_OPEN_FLAGS =
-        (sizeof(ArmLinux::openFlagTable)/sizeof(ArmLinux::openFlagTable[0]));
+const int ArmLinux32::NUM_OPEN_FLAGS = sizeof(ArmLinux32::openFlagTable) /
+                                       sizeof(ArmLinux32::openFlagTable[0]);
+
+// open(2) flags translation table
+OpenFlagTransTable ArmLinux64::openFlagTable[] = {
+#ifdef _MSC_VER
+  { ArmLinux64::TGT_O_RDONLY,     _O_RDONLY },
+  { ArmLinux64::TGT_O_WRONLY,     _O_WRONLY },
+  { ArmLinux64::TGT_O_RDWR,       _O_RDWR },
+  { ArmLinux64::TGT_O_APPEND,     _O_APPEND },
+  { ArmLinux64::TGT_O_CREAT,      _O_CREAT },
+  { ArmLinux64::TGT_O_TRUNC,      _O_TRUNC },
+  { ArmLinux64::TGT_O_EXCL,       _O_EXCL },
+#ifdef _O_NONBLOCK
+  { ArmLinux64::TGT_O_NONBLOCK,   _O_NONBLOCK },
+#endif
+#ifdef _O_NOCTTY
+  { ArmLinux64::TGT_O_NOCTTY,     _O_NOCTTY },
+#endif
+#ifdef _O_SYNC
+  { ArmLinux64::TGT_O_SYNC,       _O_SYNC },
+#endif
+#else /* !_MSC_VER */
+  { ArmLinux64::TGT_O_RDONLY,     O_RDONLY },
+  { ArmLinux64::TGT_O_WRONLY,     O_WRONLY },
+  { ArmLinux64::TGT_O_RDWR,       O_RDWR },
+  { ArmLinux64::TGT_O_CREAT,      O_CREAT },
+  { ArmLinux64::TGT_O_EXCL,       O_EXCL },
+  { ArmLinux64::TGT_O_NOCTTY,     O_NOCTTY },
+  { ArmLinux64::TGT_O_TRUNC,      O_TRUNC },
+  { ArmLinux64::TGT_O_APPEND,     O_APPEND },
+  { ArmLinux64::TGT_O_NONBLOCK,   O_NONBLOCK },
+#ifdef O_SYNC
+  { ArmLinux64::TGT_O_SYNC,       O_SYNC },
+#endif
+#ifdef FASYNC
+  { ArmLinux64::TGT_FASYNC,       FASYNC },
+#endif
+#ifdef O_DIRECT
+  { ArmLinux64::TGT_O_DIRECT,     O_DIRECT },
+#endif
+#ifdef O_LARGEFILE
+  { ArmLinux64::TGT_O_LARGEFILE,  O_LARGEFILE },
+#endif
+#ifdef O_DIRECTORY
+  { ArmLinux64::TGT_O_DIRECTORY,  O_DIRECTORY },
+#endif
+#ifdef O_NOFOLLOW
+  { ArmLinux64::TGT_O_NOFOLLOW,   O_NOFOLLOW },
+#endif
+#endif /* _MSC_VER */
+};
+
+const int ArmLinux64::NUM_OPEN_FLAGS = sizeof(ArmLinux64::openFlagTable) /
+                                       sizeof(ArmLinux64::openFlagTable[0]);
 
diff --git a/src/arch/arm/linux/linux.hh b/src/arch/arm/linux/linux.hh
index 5a3e68a78..fbf5d2185 100644
--- a/src/arch/arm/linux/linux.hh
+++ b/src/arch/arm/linux/linux.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2011-2012 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -47,7 +47,7 @@
 
 #include "kern/linux/linux.hh"
 
-class ArmLinux : public Linux
+class ArmLinux32 : public Linux
 {
   public:
 
@@ -123,8 +123,10 @@ class ArmLinux : public Linux
         uint16_t st_uid;
         uint16_t st_gid;
         uint32_t st_rdev;
+        uint32_t __pad1;
         uint32_t st_size;
         uint32_t st_blksize;
+        uint32_t __pad2;
         uint32_t st_blocks;
         uint32_t st_atimeX;
         uint32_t st_atime_nsec;
@@ -198,8 +200,192 @@ class ArmLinux : public Linux
         int32_t tms_cutime;     //!< user time of children
         int32_t tms_cstime;     //!< system time of children
     };
+};
 
+class ArmLinux64 : public Linux
+{
+  public:
 
+    /// This table maps the target open() flags to the corresponding
+    /// host open() flags.
+    static OpenFlagTransTable openFlagTable[];
+
+    /// Number of entries in openFlagTable[].
+    static const int NUM_OPEN_FLAGS;
+
+    //@{
+    /// Basic ARM Linux types
+    typedef uint64_t size_t;
+    typedef uint64_t off_t;
+    typedef int64_t time_t;
+    typedef int64_t clock_t;
+    //@}
+
+    //@{
+    /// open(2) flag values.
+    static const int TGT_O_RDONLY    = 00000000; //!< O_RDONLY
+    static const int TGT_O_WRONLY    = 00000001; //!< O_WRONLY
+    static const int TGT_O_RDWR      = 00000002; //!< O_RDWR
+    static const int TGT_O_CREAT     = 00000100; //!< O_CREAT
+    static const int TGT_O_EXCL      = 00000200; //!< O_EXCL
+    static const int TGT_O_NOCTTY    = 00000400; //!< O_NOCTTY
+    static const int TGT_O_TRUNC     = 00001000; //!< O_TRUNC
+    static const int TGT_O_APPEND    = 00002000; //!< O_APPEND
+    static const int TGT_O_NONBLOCK  = 00004000; //!< O_NONBLOCK
+    static const int TGT_O_SYNC      = 00010000; //!< O_SYNC
+    static const int TGT_FASYNC      = 00020000; //!< FASYNC
+    static const int TGT_O_DIRECT    = 00040000; //!< O_DIRECT
+    static const int TGT_O_LARGEFILE = 00100000; //!< O_LARGEFILE
+    static const int TGT_O_DIRECTORY = 00200000; //!< O_DIRECTORY
+    static const int TGT_O_NOFOLLOW  = 00400000; //!< O_NOFOLLOW
+    static const int TGT_O_NOATIME   = 01000000; //!< O_NOATIME
+    static const int TGT_O_CLOEXEC   = 02000000; //!< O_NOATIME
+    //@}
+
+    /// For mmap().
+    static const unsigned TGT_MAP_ANONYMOUS = 0x20;
+    static const unsigned TGT_MAP_FIXED     = 0x10;
+
+    //@{
+    /// For getrusage().
+    static const int TGT_RUSAGE_SELF = 0;
+    static const int TGT_RUSAGE_CHILDREN = -1;
+    static const int TGT_RUSAGE_BOTH = -2;
+    //@}
+
+    //@{
+    /// ioctl() command codes.
+    static const unsigned TIOCGETP_   = 0x5401;
+    static const unsigned TIOCSETP_   = 0x80067409;
+    static const unsigned TIOCSETN_   = 0x8006740a;
+    static const unsigned TIOCSETC_   = 0x80067411;
+    static const unsigned TIOCGETC_   = 0x40067412;
+    static const unsigned FIONREAD_   = 0x4004667f;
+    static const unsigned TIOCISATTY_ = 0x2000745e;
+    static const unsigned TIOCGETS_   = 0x402c7413;
+    static const unsigned TIOCGETA_   = 0x5405;
+    static const unsigned TCSETAW_    = 0x5407;     // 2.6.15 kernel
+    //@}
+
+    /// For table().
+    static const int TBL_SYSINFO = 12;
+
+    /// Resource enumeration for getrlimit().
+    enum rlimit_resources {
+        TGT_RLIMIT_CPU = 0,
+        TGT_RLIMIT_FSIZE = 1,
+        TGT_RLIMIT_DATA = 2,
+        TGT_RLIMIT_STACK = 3,
+        TGT_RLIMIT_CORE = 4,
+        TGT_RLIMIT_RSS = 5,
+        TGT_RLIMIT_NPROC = 6,
+        TGT_RLIMIT_NOFILE = 7,
+        TGT_RLIMIT_MEMLOCK = 8,
+        TGT_RLIMIT_AS = 9,
+        TGT_RLIMIT_LOCKS = 10
+    };
+
+    /// Limit struct for getrlimit/setrlimit.
+    struct rlimit {
+        uint64_t  rlim_cur;     //!< soft limit
+        uint64_t  rlim_max;     //!< hard limit
+    };
+
+    /// For gettimeofday().
+    struct timeval {
+        int64_t tv_sec;         //!< seconds
+        int64_t tv_usec;        //!< microseconds
+    };
+
+    // For writev/readv
+    struct tgt_iovec {
+        uint64_t iov_base; // void *
+        uint64_t iov_len;
+    };
+
+    typedef struct {
+        uint64_t st_dev;
+        uint64_t st_ino;
+        uint64_t st_nlink;
+        uint32_t st_mode;
+        uint32_t st_uid;
+        uint32_t st_gid;
+        uint32_t __pad0;
+        uint64_t st_rdev;
+        uint64_t st_size;
+        uint64_t st_blksize;
+        uint64_t st_blocks;
+        uint64_t st_atimeX;
+        uint64_t st_atime_nsec;
+        uint64_t st_mtimeX;
+        uint64_t st_mtime_nsec;
+        uint64_t st_ctimeX;
+        uint64_t st_ctime_nsec;
+    } tgt_stat;
+
+    typedef struct {
+        uint64_t st_dev;
+        uint64_t st_ino;
+        uint32_t st_mode;
+        uint32_t st_nlink;
+        uint32_t st_uid;
+        uint32_t st_gid;
+        uint32_t __pad0;
+        uint64_t st_rdev;
+        uint64_t st_size;
+        uint64_t st_blksize;
+        uint64_t st_blocks;
+        uint64_t st_atimeX;
+        uint64_t st_atime_nsec;
+        uint64_t st_mtimeX;
+        uint64_t st_mtime_nsec;
+        uint64_t st_ctimeX;
+        uint64_t st_ctime_nsec;
+    } tgt_stat64;
+
+    typedef struct {
+        int64_t  uptime;    /* Seconds since boot */
+        uint64_t loads[3];  /* 1, 5, and 15 minute load averages */
+        uint64_t totalram;  /* Total usable main memory size */
+        uint64_t freeram;   /* Available memory size */
+        uint64_t sharedram; /* Amount of shared memory */
+        uint64_t bufferram; /* Memory used by buffers */
+        uint64_t totalswap; /* Total swap space size */
+        uint64_t freeswap;  /* swap space still available */
+        uint16_t procs;     /* Number of current processes */
+        uint16_t pad;
+        uint64_t totalhigh; /* Total high memory size */
+        uint64_t freehigh;  /* Available high memory size */
+        uint32_t mem_unit;  /* Memory unit size in bytes */
+    } tgt_sysinfo;
+
+    /// For getrusage().
+    struct rusage {
+        struct timeval ru_utime;        //!< user time used
+        struct timeval ru_stime;        //!< system time used
+        int64_t ru_maxrss;              //!< max rss
+        int64_t ru_ixrss;               //!< integral shared memory size
+        int64_t ru_idrss;               //!< integral unshared data "
+        int64_t ru_isrss;               //!< integral unshared stack "
+        int64_t ru_minflt;              //!< page reclaims - total vmfaults
+        int64_t ru_majflt;              //!< page faults
+        int64_t ru_nswap;               //!< swaps
+        int64_t ru_inblock;             //!< block input operations
+        int64_t ru_oublock;             //!< block output operations
+        int64_t ru_msgsnd;              //!< messages sent
+        int64_t ru_msgrcv;              //!< messages received
+        int64_t ru_nsignals;            //!< signals received
+        int64_t ru_nvcsw;               //!< voluntary context switches
+        int64_t ru_nivcsw;              //!< involuntary "
+    };
+
+    /// For times().
+    struct tms {
+        int64_t tms_utime;      //!< user time
+        int64_t tms_stime;      //!< system time
+        int64_t tms_cutime;     //!< user time of children
+        int64_t tms_cstime;     //!< system time of children
+    };
 };
 
 #endif
diff --git a/src/arch/arm/linux/process.cc b/src/arch/arm/linux/process.cc
index 169565a04..e34a813ea 100644
--- a/src/arch/arm/linux/process.cc
+++ b/src/arch/arm/linux/process.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -41,6 +41,7 @@
  * Authors: Korey Sewell
  *          Stephen Hines
  *          Ali Saidi
+ *          Giacomo Gabrielli
  */
 
 #include "arch/arm/linux/linux.hh"
@@ -58,8 +59,8 @@ using namespace ArmISA;
 
 /// Target uname() handler.
 static SyscallReturn
-unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
-          ThreadContext *tc)
+unameFunc32(SyscallDesc *desc, int callnum, LiveProcess *process,
+            ThreadContext *tc)
 {
     int index = 0;
     TypedBufferArg<Linux::utsname> name(process->getSyscallArg(tc, index));
@@ -74,13 +75,56 @@ unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
     return 0;
 }
 
-SyscallDesc ArmLinuxProcess::syscallDescs[] = {
+/// Target uname() handler.
+static SyscallReturn
+unameFunc64(SyscallDesc *desc, int callnum, LiveProcess *process,
+            ThreadContext *tc)
+{
+    int index = 0;
+    TypedBufferArg<Linux::utsname> name(process->getSyscallArg(tc, index));
+
+    strcpy(name->sysname, "Linux");
+    strcpy(name->nodename, "gem5");
+    strcpy(name->release, "3.7.0+");
+    strcpy(name->version, "#1 SMP Sat Dec  1 00:00:00 GMT 2012");
+    strcpy(name->machine, "armv8l");
+
+    name.copyOut(tc->getMemProxy());
+    return 0;
+}
+
+/// Target set_tls() handler.
+static SyscallReturn
+setTLSFunc32(SyscallDesc *desc, int callnum, LiveProcess *process,
+             ThreadContext *tc)
+{
+    int index = 0;
+    uint32_t tlsPtr = process->getSyscallArg(tc, index);
+
+    tc->getMemProxy().writeBlob(ArmLinuxProcess32::commPage + 0x0ff0,
+                                 (uint8_t *)&tlsPtr, sizeof(tlsPtr));
+    tc->setMiscReg(MISCREG_TPIDRURO,tlsPtr);
+    return 0;
+}
+
+static SyscallReturn
+setTLSFunc64(SyscallDesc *desc, int callnum, LiveProcess *process,
+             ThreadContext *tc)
+{
+    int index = 0;
+    uint32_t tlsPtr = process->getSyscallArg(tc, index);
+
+    tc->setMiscReg(MISCREG_TPIDRRO_EL0, tlsPtr);
+    return 0;
+}
+
+static SyscallDesc syscallDescs32[] = {
     /*  0 */ SyscallDesc("syscall", unimplementedFunc),
     /*  1 */ SyscallDesc("exit", exitFunc),
     /*  2 */ SyscallDesc("fork", unimplementedFunc),
     /*  3 */ SyscallDesc("read", readFunc),
     /*  4 */ SyscallDesc("write", writeFunc),
-    /*  5 */ SyscallDesc("open", openFunc<ArmLinux>),
+    /*  5 */ SyscallDesc("open", openFunc<ArmLinux32>),
     /*  6 */ SyscallDesc("close", closeFunc),
     /*  7 */ SyscallDesc("unused#7", unimplementedFunc),
     /*  8 */ SyscallDesc("creat", unimplementedFunc),
@@ -88,9 +132,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 10 */ SyscallDesc("unlink", unlinkFunc),
     /* 11 */ SyscallDesc("execve", unimplementedFunc),
     /* 12 */ SyscallDesc("chdir", unimplementedFunc),
-    /* 13 */ SyscallDesc("time", timeFunc<ArmLinux>),
+    /* 13 */ SyscallDesc("time", timeFunc<ArmLinux32>),
     /* 14 */ SyscallDesc("mknod", unimplementedFunc),
-    /* 15 */ SyscallDesc("chmod", chmodFunc<ArmLinux>),
+    /* 15 */ SyscallDesc("chmod", chmodFunc<ArmLinux32>),
     /* 16 */ SyscallDesc("lchown", chownFunc),
     /* 17 */ SyscallDesc("unused#17", unimplementedFunc),
     /* 18 */ SyscallDesc("unused#18", unimplementedFunc),
@@ -118,7 +162,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 40 */ SyscallDesc("rmdir", unimplementedFunc),
     /* 41 */ SyscallDesc("dup", dupFunc),
     /* 42 */ SyscallDesc("pipe", pipePseudoFunc),
-    /* 43 */ SyscallDesc("times", timesFunc<ArmLinux>),
+    /* 43 */ SyscallDesc("times", timesFunc<ArmLinux32>),
     /* 44 */ SyscallDesc("unused#44", unimplementedFunc),
     /* 45 */ SyscallDesc("brk", brkFunc),
     /* 46 */ SyscallDesc("setgid", unimplementedFunc),
@@ -129,7 +173,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 51 */ SyscallDesc("acct", unimplementedFunc),
     /* 52 */ SyscallDesc("umount2", unimplementedFunc),
     /* 53 */ SyscallDesc("unused#53", unimplementedFunc),
-    /* 54 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux>),
+    /* 54 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux32>),
     /* 55 */ SyscallDesc("fcntl", fcntlFunc),
     /* 56 */ SyscallDesc("unused#56", unimplementedFunc),
     /* 57 */ SyscallDesc("setpgid", unimplementedFunc),
@@ -151,9 +195,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 73 */ SyscallDesc("sigpending", unimplementedFunc),
     /* 74 */ SyscallDesc("sethostname", ignoreFunc),
     /* 75 */ SyscallDesc("setrlimit", ignoreFunc),
-    /* 76 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux>),
-    /* 77 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux>),
-    /* 78 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux>),
+    /* 76 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux32>),
+    /* 77 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux32>),
+    /* 78 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux32>),
     /* 79 */ SyscallDesc("settimeofday", unimplementedFunc),
     /* 80 */ SyscallDesc("getgroups", unimplementedFunc),
     /* 81 */ SyscallDesc("setgroups", unimplementedFunc),
@@ -165,7 +209,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 87 */ SyscallDesc("swapon", unimplementedFunc),
     /* 88 */ SyscallDesc("reboot", unimplementedFunc),
     /* 89 */ SyscallDesc("readdir", unimplementedFunc),
-    /* 90 */ SyscallDesc("mmap", mmapFunc<ArmLinux>),
+    /* 90 */ SyscallDesc("mmap", mmapFunc<ArmLinux32>),
     /* 91 */ SyscallDesc("munmap", munmapFunc),
     /* 92 */ SyscallDesc("truncate", truncateFunc),
     /* 93 */ SyscallDesc("ftruncate", ftruncateFunc),
@@ -181,9 +225,9 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 103 */ SyscallDesc("syslog", unimplementedFunc),
     /* 104 */ SyscallDesc("setitimer", unimplementedFunc),
     /* 105 */ SyscallDesc("getitimer", unimplementedFunc),
-    /* 106 */ SyscallDesc("stat",  statFunc<ArmLinux>),
+    /* 106 */ SyscallDesc("stat",  statFunc<ArmLinux32>),
     /* 107 */ SyscallDesc("lstat", unimplementedFunc),
-    /* 108 */ SyscallDesc("fstat", fstatFunc<ArmLinux>),
+    /* 108 */ SyscallDesc("fstat", fstatFunc<ArmLinux32>),
     /* 109 */ SyscallDesc("unused#109", unimplementedFunc),
     /* 110 */ SyscallDesc("unused#101", unimplementedFunc),
     /* 111 */ SyscallDesc("vhangup", unimplementedFunc),
@@ -191,17 +235,17 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 113 */ SyscallDesc("syscall", unimplementedFunc),
     /* 114 */ SyscallDesc("wait4", unimplementedFunc),
     /* 115 */ SyscallDesc("swapoff", unimplementedFunc),
-    /* 116 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux>),
+    /* 116 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux32>),
     /* 117 */ SyscallDesc("ipc", unimplementedFunc),
     /* 118 */ SyscallDesc("fsync", unimplementedFunc),
     /* 119 */ SyscallDesc("sigreturn", unimplementedFunc),
     /* 120 */ SyscallDesc("clone", cloneFunc),
     /* 121 */ SyscallDesc("setdomainname", unimplementedFunc),
-    /* 122 */ SyscallDesc("uname", unameFunc),
+    /* 122 */ SyscallDesc("uname", unameFunc32),
     /* 123 */ SyscallDesc("unused#123", unimplementedFunc),
     /* 124 */ SyscallDesc("adjtimex", unimplementedFunc),
     /* 125 */ SyscallDesc("mprotect", ignoreFunc),
-    /* 126 */ SyscallDesc("sigprocmask", unimplementedFunc),
+    /* 126 */ SyscallDesc("sigprocmask", ignoreWarnOnceFunc),
     /* 127 */ SyscallDesc("unused#127", unimplementedFunc),
     /* 128 */ SyscallDesc("init_module", unimplementedFunc),
     /* 129 */ SyscallDesc("delete_module", unimplementedFunc),
@@ -221,7 +265,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 143 */ SyscallDesc("flock", unimplementedFunc),
     /* 144 */ SyscallDesc("msync", unimplementedFunc),
     /* 145 */ SyscallDesc("readv", unimplementedFunc),
-    /* 146 */ SyscallDesc("writev", writevFunc<ArmLinux>),
+    /* 146 */ SyscallDesc("writev", writevFunc<ArmLinux32>),
     /* 147 */ SyscallDesc("getsid", unimplementedFunc),
     /* 148 */ SyscallDesc("fdatasync", unimplementedFunc),
     /* 149 */ SyscallDesc("sysctl", unimplementedFunc),
@@ -238,7 +282,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 160 */ SyscallDesc("sched_get_priority_min", unimplementedFunc),
     /* 161 */ SyscallDesc("sched_rr_get_interval", unimplementedFunc),
     /* 162 */ SyscallDesc("nanosleep", ignoreWarnOnceFunc),
-    /* 163 */ SyscallDesc("mremap", mremapFunc<ArmLinux>), // ARM-specific
+    /* 163 */ SyscallDesc("mremap", mremapFunc<ArmLinux32>), // ARM-specific
     /* 164 */ SyscallDesc("setresuid", unimplementedFunc),
     /* 165 */ SyscallDesc("getresuid", unimplementedFunc),
     /* 166 */ SyscallDesc("unused#166", unimplementedFunc),
@@ -266,13 +310,13 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 188 */ SyscallDesc("unused#188", unimplementedFunc),
     /* 189 */ SyscallDesc("unused#189", unimplementedFunc),
     /* 190 */ SyscallDesc("vfork", unimplementedFunc),
-    /* 191 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux>),
-    /* 192 */ SyscallDesc("mmap2", mmapFunc<ArmLinux>),
+    /* 191 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux32>),
+    /* 192 */ SyscallDesc("mmap2", mmapFunc<ArmLinux32>),
     /* 193 */ SyscallDesc("truncate64", unimplementedFunc),
     /* 194 */ SyscallDesc("ftruncate64", ftruncate64Func),
-    /* 195 */ SyscallDesc("stat64", stat64Func<ArmLinux>),
-    /* 196 */ SyscallDesc("lstat64", lstat64Func<ArmLinux>),
-    /* 197 */ SyscallDesc("fstat64", fstat64Func<ArmLinux>),
+    /* 195 */ SyscallDesc("stat64", stat64Func<ArmLinux32>),
+    /* 196 */ SyscallDesc("lstat64", lstat64Func<ArmLinux32>),
+    /* 197 */ SyscallDesc("fstat64", fstat64Func<ArmLinux32>),
     /* 198 */ SyscallDesc("lchown", unimplementedFunc),
     /* 199 */ SyscallDesc("getuid", getuidFunc),
     /* 200 */ SyscallDesc("getgid", getgidFunc),
@@ -319,7 +363,7 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 241 */ SyscallDesc("sched_setaffinity", unimplementedFunc),
     /* 242 */ SyscallDesc("sched_getaffinity", unimplementedFunc),
     /* 243 */ SyscallDesc("io_setup", unimplementedFunc),
-    /* 244 */ SyscallDesc("io_destory", unimplementedFunc),
+    /* 244 */ SyscallDesc("io_destroy", unimplementedFunc),
     /* 245 */ SyscallDesc("io_getevents", unimplementedFunc),
     /* 246 */ SyscallDesc("io_submit", unimplementedFunc),
     /* 247 */ SyscallDesc("io_cancel", unimplementedFunc),
@@ -441,68 +485,1187 @@ SyscallDesc ArmLinuxProcess::syscallDescs[] = {
     /* 363 */ SyscallDesc("sys_rt_tgsigqueueinfo", unimplementedFunc),
     /* 364 */ SyscallDesc("sys_perf_event_open", unimplementedFunc),
     /* 365 */ SyscallDesc("sys_recvmmsg", unimplementedFunc),
-
 };
 
-/// Target set_tls() handler.
-static SyscallReturn
-setTLSFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
-          ThreadContext *tc)
-{
-    int index = 0;
-    uint32_t tlsPtr = process->getSyscallArg(tc, index);
+static SyscallDesc syscallDescs64[] = {
+    /*    0 */ SyscallDesc("io_setup", unimplementedFunc),
+    /*    1 */ SyscallDesc("io_destroy", unimplementedFunc),
+    /*    2 */ SyscallDesc("io_submit", unimplementedFunc),
+    /*    3 */ SyscallDesc("io_cancel", unimplementedFunc),
+    /*    4 */ SyscallDesc("io_getevents", unimplementedFunc),
+    /*    5 */ SyscallDesc("setxattr", unimplementedFunc),
+    /*    6 */ SyscallDesc("lsetxattr", unimplementedFunc),
+    /*    7 */ SyscallDesc("fsetxattr", unimplementedFunc),
+    /*    8 */ SyscallDesc("getxattr", unimplementedFunc),
+    /*    9 */ SyscallDesc("lgetxattr", unimplementedFunc),
+    /*   10 */ SyscallDesc("fgetxattr", unimplementedFunc),
+    /*   11 */ SyscallDesc("listxattr", unimplementedFunc),
+    /*   12 */ SyscallDesc("llistxattr", unimplementedFunc),
+    /*   13 */ SyscallDesc("flistxattr", unimplementedFunc),
+    /*   14 */ SyscallDesc("removexattr", unimplementedFunc),
+    /*   15 */ SyscallDesc("lremovexattr", unimplementedFunc),
+    /*   16 */ SyscallDesc("fremovexattr", unimplementedFunc),
+    /*   17 */ SyscallDesc("getcwd", getcwdFunc),
+    /*   18 */ SyscallDesc("lookup_dcookie", unimplementedFunc),
+    /*   19 */ SyscallDesc("eventfd2", unimplementedFunc),
+    /*   20 */ SyscallDesc("epoll_create1", unimplementedFunc),
+    /*   21 */ SyscallDesc("epoll_ctl", unimplementedFunc),
+    /*   22 */ SyscallDesc("epoll_pwait", unimplementedFunc),
+    /*   23 */ SyscallDesc("dup", dupFunc),
+    /*   24 */ SyscallDesc("dup3", unimplementedFunc),
+    /*   25 */ SyscallDesc("fcntl64", fcntl64Func),
+    /*   26 */ SyscallDesc("inotify_init1", unimplementedFunc),
+    /*   27 */ SyscallDesc("inotify_add_watch", unimplementedFunc),
+    /*   28 */ SyscallDesc("inotify_rm_watch", unimplementedFunc),
+    /*   29 */ SyscallDesc("ioctl", ioctlFunc<ArmLinux64>),
+    /*   30 */ SyscallDesc("ioprio_set", unimplementedFunc),
+    /*   31 */ SyscallDesc("ioprio_get", unimplementedFunc),
+    /*   32 */ SyscallDesc("flock", unimplementedFunc),
+    /*   33 */ SyscallDesc("mknodat", unimplementedFunc),
+    /*   34 */ SyscallDesc("mkdirat", unimplementedFunc),
+    /*   35 */ SyscallDesc("unlinkat", unimplementedFunc),
+    /*   36 */ SyscallDesc("symlinkat", unimplementedFunc),
+    /*   37 */ SyscallDesc("linkat", unimplementedFunc),
+    /*   38 */ SyscallDesc("renameat", unimplementedFunc),
+    /*   39 */ SyscallDesc("umount2", unimplementedFunc),
+    /*   40 */ SyscallDesc("mount", unimplementedFunc),
+    /*   41 */ SyscallDesc("pivot_root", unimplementedFunc),
+    /*   42 */ SyscallDesc("nfsservctl", unimplementedFunc),
+    /*   43 */ SyscallDesc("statfs64", unimplementedFunc),
+    /*   44 */ SyscallDesc("fstatfs64", unimplementedFunc),
+    /*   45 */ SyscallDesc("truncate64", unimplementedFunc),
+    /*   46 */ SyscallDesc("ftruncate64", ftruncate64Func),
+    /*   47 */ SyscallDesc("fallocate", unimplementedFunc),
+    /*   48 */ SyscallDesc("faccessat", unimplementedFunc),
+    /*   49 */ SyscallDesc("chdir", unimplementedFunc),
+    /*   50 */ SyscallDesc("fchdir", unimplementedFunc),
+    /*   51 */ SyscallDesc("chroot", unimplementedFunc),
+    /*   52 */ SyscallDesc("fchmod", unimplementedFunc),
+    /*   53 */ SyscallDesc("fchmodat", unimplementedFunc),
+    /*   54 */ SyscallDesc("fchownat", unimplementedFunc),
+    /*   55 */ SyscallDesc("fchown", unimplementedFunc),
+    /*   56 */ SyscallDesc("openat", openatFunc<ArmLinux64>),
+    /*   57 */ SyscallDesc("close", closeFunc),
+    /*   58 */ SyscallDesc("vhangup", unimplementedFunc),
+    /*   59 */ SyscallDesc("pipe2", unimplementedFunc),
+    /*   60 */ SyscallDesc("quotactl", unimplementedFunc),
+    /*   61 */ SyscallDesc("getdents64", unimplementedFunc),
+    /*   62 */ SyscallDesc("llseek", lseekFunc),
+    /*   63 */ SyscallDesc("read", readFunc),
+    /*   64 */ SyscallDesc("write", writeFunc),
+    /*   65 */ SyscallDesc("readv", unimplementedFunc),
+    /*   66 */ SyscallDesc("writev", writevFunc<ArmLinux64>),
+    /*   67 */ SyscallDesc("pread64", unimplementedFunc),
+    /*   68 */ SyscallDesc("pwrite64", unimplementedFunc),
+    /*   69 */ SyscallDesc("preadv", unimplementedFunc),
+    /*   70 */ SyscallDesc("pwritev", unimplementedFunc),
+    /*   71 */ SyscallDesc("sendfile64", unimplementedFunc),
+    /*   72 */ SyscallDesc("pselect6", unimplementedFunc),
+    /*   73 */ SyscallDesc("ppoll", unimplementedFunc),
+    /*   74 */ SyscallDesc("signalfd4", unimplementedFunc),
+    /*   75 */ SyscallDesc("vmsplice", unimplementedFunc),
+    /*   76 */ SyscallDesc("splice", unimplementedFunc),
+    /*   77 */ SyscallDesc("tee", unimplementedFunc),
+    /*   78 */ SyscallDesc("readlinkat", unimplementedFunc),
+    /*   79 */ SyscallDesc("fstatat64", fstatat64Func<ArmLinux64>),
+    /*   80 */ SyscallDesc("fstat64", fstat64Func<ArmLinux64>),
+    /*   81 */ SyscallDesc("sync", unimplementedFunc),
+    /*   82 */ SyscallDesc("fsync", unimplementedFunc),
+    /*   83 */ SyscallDesc("fdatasync", unimplementedFunc),
+    /*   84 */ SyscallDesc("sync_file_range", unimplementedFunc),
+    /*   85 */ SyscallDesc("timerfd_create", unimplementedFunc),
+    /*   86 */ SyscallDesc("timerfd_settime", unimplementedFunc),
+    /*   87 */ SyscallDesc("timerfd_gettime", unimplementedFunc),
+    /*   88 */ SyscallDesc("utimensat", unimplementedFunc),
+    /*   89 */ SyscallDesc("acct", unimplementedFunc),
+    /*   90 */ SyscallDesc("capget", unimplementedFunc),
+    /*   91 */ SyscallDesc("capset", unimplementedFunc),
+    /*   92 */ SyscallDesc("personality", unimplementedFunc),
+    /*   93 */ SyscallDesc("exit", exitFunc),
+    /*   94 */ SyscallDesc("exit_group", exitGroupFunc),
+    /*   95 */ SyscallDesc("waitid", unimplementedFunc),
+    /*   96 */ SyscallDesc("set_tid_address", unimplementedFunc),
+    /*   97 */ SyscallDesc("unshare", unimplementedFunc),
+    /*   98 */ SyscallDesc("futex", unimplementedFunc),
+    /*   99 */ SyscallDesc("set_robust_list", unimplementedFunc),
+    /*  100 */ SyscallDesc("get_robust_list", unimplementedFunc),
+    /*  101 */ SyscallDesc("nanosleep", ignoreWarnOnceFunc),
+    /*  102 */ SyscallDesc("getitimer", unimplementedFunc),
+    /*  103 */ SyscallDesc("setitimer", unimplementedFunc),
+    /*  104 */ SyscallDesc("kexec_load", unimplementedFunc),
+    /*  105 */ SyscallDesc("init_module", unimplementedFunc),
+    /*  106 */ SyscallDesc("delete_module", unimplementedFunc),
+    /*  107 */ SyscallDesc("timer_create", unimplementedFunc),
+    /*  108 */ SyscallDesc("timer_gettime", unimplementedFunc),
+    /*  109 */ SyscallDesc("timer_getoverrun", unimplementedFunc),
+    /*  110 */ SyscallDesc("timer_settime", unimplementedFunc),
+    /*  111 */ SyscallDesc("timer_delete", unimplementedFunc),
+    /*  112 */ SyscallDesc("clock_settime", unimplementedFunc),
+    /*  113 */ SyscallDesc("clock_gettime", unimplementedFunc),
+    /*  114 */ SyscallDesc("clock_getres", unimplementedFunc),
+    /*  115 */ SyscallDesc("clock_nanosleep", unimplementedFunc),
+    /*  116 */ SyscallDesc("syslog", unimplementedFunc),
+    /*  117 */ SyscallDesc("ptrace", unimplementedFunc),
+    /*  118 */ SyscallDesc("sched_setparam", unimplementedFunc),
+    /*  119 */ SyscallDesc("sched_setscheduler", unimplementedFunc),
+    /*  120 */ SyscallDesc("sched_getscheduler", unimplementedFunc),
+    /*  121 */ SyscallDesc("sched_getparam", unimplementedFunc),
+    /*  122 */ SyscallDesc("sched_setaffinity", unimplementedFunc),
+    /*  123 */ SyscallDesc("sched_getaffinity", unimplementedFunc),
+    /*  124 */ SyscallDesc("sched_yield", unimplementedFunc),
+    /*  125 */ SyscallDesc("sched_get_priority_max", unimplementedFunc),
+    /*  126 */ SyscallDesc("sched_get_priority_min", unimplementedFunc),
+    /*  127 */ SyscallDesc("sched_rr_get_interval", unimplementedFunc),
+    /*  128 */ SyscallDesc("restart_syscall", unimplementedFunc),
+    /*  129 */ SyscallDesc("kill", ignoreFunc),
+    /*  130 */ SyscallDesc("tkill", unimplementedFunc),
+    /*  131 */ SyscallDesc("tgkill", unimplementedFunc),
+    /*  132 */ SyscallDesc("sigaltstack", unimplementedFunc),
+    /*  133 */ SyscallDesc("rt_sigsuspend", unimplementedFunc),
+    /*  134 */ SyscallDesc("rt_sigaction", ignoreFunc),
+    /*  135 */ SyscallDesc("rt_sigprocmask", ignoreWarnOnceFunc),
+    /*  136 */ SyscallDesc("rt_sigpending", unimplementedFunc),
+    /*  137 */ SyscallDesc("rt_sigtimedwait", unimplementedFunc),
+    /*  138 */ SyscallDesc("rt_sigqueueinfo", ignoreFunc),
+    /*  139 */ SyscallDesc("rt_sigreturn", unimplementedFunc),
+    /*  140 */ SyscallDesc("setpriority", unimplementedFunc),
+    /*  141 */ SyscallDesc("getpriority", unimplementedFunc),
+    /*  142 */ SyscallDesc("reboot", unimplementedFunc),
+    /*  143 */ SyscallDesc("setregid", unimplementedFunc),
+    /*  144 */ SyscallDesc("setgid", unimplementedFunc),
+    /*  145 */ SyscallDesc("setreuid", unimplementedFunc),
+    /*  146 */ SyscallDesc("setuid", unimplementedFunc),
+    /*  147 */ SyscallDesc("setresuid", unimplementedFunc),
+    /*  148 */ SyscallDesc("getresuid", unimplementedFunc),
+    /*  149 */ SyscallDesc("setresgid", unimplementedFunc),
+    /*  150 */ SyscallDesc("getresgid", unimplementedFunc),
+    /*  151 */ SyscallDesc("setfsuid", unimplementedFunc),
+    /*  152 */ SyscallDesc("setfsgid", unimplementedFunc),
+    /*  153 */ SyscallDesc("times", timesFunc<ArmLinux64>),
+    /*  154 */ SyscallDesc("setpgid", unimplementedFunc),
+    /*  155 */ SyscallDesc("getpgid", unimplementedFunc),
+    /*  156 */ SyscallDesc("getsid", unimplementedFunc),
+    /*  157 */ SyscallDesc("setsid", unimplementedFunc),
+    /*  158 */ SyscallDesc("getgroups", unimplementedFunc),
+    /*  159 */ SyscallDesc("setgroups", unimplementedFunc),
+    /*  160 */ SyscallDesc("uname", unameFunc64),
+    /*  161 */ SyscallDesc("sethostname", ignoreFunc),
+    /*  162 */ SyscallDesc("setdomainname", unimplementedFunc),
+    /*  163 */ SyscallDesc("getrlimit", getrlimitFunc<ArmLinux64>),
+    /*  164 */ SyscallDesc("setrlimit", ignoreFunc),
+    /*  165 */ SyscallDesc("getrusage", getrusageFunc<ArmLinux64>),
+    /*  166 */ SyscallDesc("umask", unimplementedFunc),
+    /*  167 */ SyscallDesc("prctl", unimplementedFunc),
+    /*  168 */ SyscallDesc("getcpu", unimplementedFunc),
+    /*  169 */ SyscallDesc("gettimeofday", gettimeofdayFunc<ArmLinux64>),
+    /*  170 */ SyscallDesc("settimeofday", unimplementedFunc),
+    /*  171 */ SyscallDesc("adjtimex", unimplementedFunc),
+    /*  172 */ SyscallDesc("getpid", getpidFunc),
+    /*  173 */ SyscallDesc("getppid", getppidFunc),
+    /*  174 */ SyscallDesc("getuid", getuidFunc),
+    /*  175 */ SyscallDesc("geteuid", geteuidFunc),
+    /*  176 */ SyscallDesc("getgid", getgidFunc),
+    /*  177 */ SyscallDesc("getegid", getegidFunc),
+    /*  178 */ SyscallDesc("gettid", unimplementedFunc),
+    /*  179 */ SyscallDesc("sysinfo", sysinfoFunc<ArmLinux64>),
+    /*  180 */ SyscallDesc("mq_open", unimplementedFunc),
+    /*  181 */ SyscallDesc("mq_unlink", unimplementedFunc),
+    /*  182 */ SyscallDesc("mq_timedsend", unimplementedFunc),
+    /*  183 */ SyscallDesc("mq_timedreceive", unimplementedFunc),
+    /*  184 */ SyscallDesc("mq_notify", unimplementedFunc),
+    /*  185 */ SyscallDesc("mq_getsetattr", unimplementedFunc),
+    /*  186 */ SyscallDesc("msgget", unimplementedFunc),
+    /*  187 */ SyscallDesc("msgctl", unimplementedFunc),
+    /*  188 */ SyscallDesc("msgrcv", unimplementedFunc),
+    /*  189 */ SyscallDesc("msgsnd", unimplementedFunc),
+    /*  190 */ SyscallDesc("semget", unimplementedFunc),
+    /*  191 */ SyscallDesc("semctl", unimplementedFunc),
+    /*  192 */ SyscallDesc("semtimedop", unimplementedFunc),
+    /*  193 */ SyscallDesc("semop", unimplementedFunc),
+    /*  194 */ SyscallDesc("shmget", unimplementedFunc),
+    /*  195 */ SyscallDesc("shmctl", unimplementedFunc),
+    /*  196 */ SyscallDesc("shmat", unimplementedFunc),
+    /*  197 */ SyscallDesc("shmdt", unimplementedFunc),
+    /*  198 */ SyscallDesc("socket", unimplementedFunc),
+    /*  199 */ SyscallDesc("socketpair", unimplementedFunc),
+    /*  200 */ SyscallDesc("bind", unimplementedFunc),
+    /*  201 */ SyscallDesc("listen", unimplementedFunc),
+    /*  202 */ SyscallDesc("accept", unimplementedFunc),
+    /*  203 */ SyscallDesc("connect", unimplementedFunc),
+    /*  204 */ SyscallDesc("getsockname", unimplementedFunc),
+    /*  205 */ SyscallDesc("getpeername", unimplementedFunc),
+    /*  206 */ SyscallDesc("sendto", unimplementedFunc),
+    /*  207 */ SyscallDesc("recvfrom", unimplementedFunc),
+    /*  208 */ SyscallDesc("setsockopt", unimplementedFunc),
+    /*  209 */ SyscallDesc("getsockopt", unimplementedFunc),
+    /*  210 */ SyscallDesc("shutdown", unimplementedFunc),
+    /*  211 */ SyscallDesc("sendmsg", unimplementedFunc),
+    /*  212 */ SyscallDesc("recvmsg", unimplementedFunc),
+    /*  213 */ SyscallDesc("readahead", unimplementedFunc),
+    /*  214 */ SyscallDesc("brk", brkFunc),
+    /*  215 */ SyscallDesc("munmap", munmapFunc),
+    /*  216 */ SyscallDesc("mremap", mremapFunc<ArmLinux64>),
+    /*  217 */ SyscallDesc("add_key", unimplementedFunc),
+    /*  218 */ SyscallDesc("request_key", unimplementedFunc),
+    /*  219 */ SyscallDesc("keyctl", unimplementedFunc),
+    /*  220 */ SyscallDesc("clone", unimplementedFunc),
+    /*  221 */ SyscallDesc("execve", unimplementedFunc),
+    /*  222 */ SyscallDesc("mmap2", mmapFunc<ArmLinux64>),
+    /*  223 */ SyscallDesc("fadvise64_64", unimplementedFunc),
+    /*  224 */ SyscallDesc("swapon", unimplementedFunc),
+    /*  225 */ SyscallDesc("swapoff", unimplementedFunc),
+    /*  226 */ SyscallDesc("mprotect", ignoreFunc),
+    /*  227 */ SyscallDesc("msync", unimplementedFunc),
+    /*  228 */ SyscallDesc("mlock", unimplementedFunc),
+    /*  229 */ SyscallDesc("munlock", unimplementedFunc),
+    /*  230 */ SyscallDesc("mlockall", unimplementedFunc),
+    /*  231 */ SyscallDesc("munlockall", unimplementedFunc),
+    /*  232 */ SyscallDesc("mincore", unimplementedFunc),
+    /*  233 */ SyscallDesc("madvise", unimplementedFunc),
+    /*  234 */ SyscallDesc("remap_file_pages", unimplementedFunc),
+    /*  235 */ SyscallDesc("mbind", unimplementedFunc),
+    /*  236 */ SyscallDesc("get_mempolicy", unimplementedFunc),
+    /*  237 */ SyscallDesc("set_mempolicy", unimplementedFunc),
+    /*  238 */ SyscallDesc("migrate_pages", unimplementedFunc),
+    /*  239 */ SyscallDesc("move_pages", unimplementedFunc),
+    /*  240 */ SyscallDesc("rt_tgsigqueueinfo", unimplementedFunc),
+    /*  241 */ SyscallDesc("perf_event_open", unimplementedFunc),
+    /*  242 */ SyscallDesc("accept4", unimplementedFunc),
+    /*  243 */ SyscallDesc("recvmmsg", unimplementedFunc),
+    /*  244 */ SyscallDesc("unused#244", unimplementedFunc),
+    /*  245 */ SyscallDesc("unused#245", unimplementedFunc),
+    /*  246 */ SyscallDesc("unused#246", unimplementedFunc),
+    /*  247 */ SyscallDesc("unused#247", unimplementedFunc),
+    /*  248 */ SyscallDesc("unused#248", unimplementedFunc),
+    /*  249 */ SyscallDesc("unused#249", unimplementedFunc),
+    /*  250 */ SyscallDesc("unused#250", unimplementedFunc),
+    /*  251 */ SyscallDesc("unused#251", unimplementedFunc),
+    /*  252 */ SyscallDesc("unused#252", unimplementedFunc),
+    /*  253 */ SyscallDesc("unused#253", unimplementedFunc),
+    /*  254 */ SyscallDesc("unused#254", unimplementedFunc),
+    /*  255 */ SyscallDesc("unused#255", unimplementedFunc),
+    /*  256 */ SyscallDesc("unused#256", unimplementedFunc),
+    /*  257 */ SyscallDesc("unused#257", unimplementedFunc),
+    /*  258 */ SyscallDesc("unused#258", unimplementedFunc),
+    /*  259 */ SyscallDesc("unused#259", unimplementedFunc),
+    /*  260 */ SyscallDesc("wait4", unimplementedFunc),
+    /*  261 */ SyscallDesc("prlimit64", unimplementedFunc),
+    /*  262 */ SyscallDesc("fanotify_init", unimplementedFunc),
+    /*  263 */ SyscallDesc("fanotify_mark", unimplementedFunc),
+    /*  264 */ SyscallDesc("name_to_handle_at", unimplementedFunc),
+    /*  265 */ SyscallDesc("open_by_handle_at", unimplementedFunc),
+    /*  266 */ SyscallDesc("clock_adjtime", unimplementedFunc),
+    /*  267 */ SyscallDesc("syncfs", unimplementedFunc),
+    /*  268 */ SyscallDesc("setns", unimplementedFunc),
+    /*  269 */ SyscallDesc("sendmmsg", unimplementedFunc),
+    /*  270 */ SyscallDesc("process_vm_readv", unimplementedFunc),
+    /*  271 */ SyscallDesc("process_vm_writev", unimplementedFunc),
+    /*  272 */ SyscallDesc("unused#272", unimplementedFunc),
+    /*  273 */ SyscallDesc("unused#273", unimplementedFunc),
+    /*  274 */ SyscallDesc("unused#274", unimplementedFunc),
+    /*  275 */ SyscallDesc("unused#275", unimplementedFunc),
+    /*  276 */ SyscallDesc("unused#276", unimplementedFunc),
+    /*  277 */ SyscallDesc("unused#277", unimplementedFunc),
+    /*  278 */ SyscallDesc("unused#278", unimplementedFunc),
+    /*  279 */ SyscallDesc("unused#279", unimplementedFunc),
+    /*  280 */ SyscallDesc("unused#280", unimplementedFunc),
+    /*  281 */ SyscallDesc("unused#281", unimplementedFunc),
+    /*  282 */ SyscallDesc("unused#282", unimplementedFunc),
+    /*  283 */ SyscallDesc("unused#283", unimplementedFunc),
+    /*  284 */ SyscallDesc("unused#284", unimplementedFunc),
+    /*  285 */ SyscallDesc("unused#285", unimplementedFunc),
+    /*  286 */ SyscallDesc("unused#286", unimplementedFunc),
+    /*  287 */ SyscallDesc("unused#287", unimplementedFunc),
+    /*  288 */ SyscallDesc("unused#288", unimplementedFunc),
+    /*  289 */ SyscallDesc("unused#289", unimplementedFunc),
+    /*  290 */ SyscallDesc("unused#290", unimplementedFunc),
+    /*  291 */ SyscallDesc("unused#291", unimplementedFunc),
+    /*  292 */ SyscallDesc("unused#292", unimplementedFunc),
+    /*  293 */ SyscallDesc("unused#293", unimplementedFunc),
+    /*  294 */ SyscallDesc("unused#294", unimplementedFunc),
+    /*  295 */ SyscallDesc("unused#295", unimplementedFunc),
+    /*  296 */ SyscallDesc("unused#296", unimplementedFunc),
+    /*  297 */ SyscallDesc("unused#297", unimplementedFunc),
+    /*  298 */ SyscallDesc("unused#298", unimplementedFunc),
+    /*  299 */ SyscallDesc("unused#299", unimplementedFunc),
+    /*  300 */ SyscallDesc("unused#300", unimplementedFunc),
+    /*  301 */ SyscallDesc("unused#301", unimplementedFunc),
+    /*  302 */ SyscallDesc("unused#302", unimplementedFunc),
+    /*  303 */ SyscallDesc("unused#303", unimplementedFunc),
+    /*  304 */ SyscallDesc("unused#304", unimplementedFunc),
+    /*  305 */ SyscallDesc("unused#305", unimplementedFunc),
+    /*  306 */ SyscallDesc("unused#306", unimplementedFunc),
+    /*  307 */ SyscallDesc("unused#307", unimplementedFunc),
+    /*  308 */ SyscallDesc("unused#308", unimplementedFunc),
+    /*  309 */ SyscallDesc("unused#309", unimplementedFunc),
+    /*  310 */ SyscallDesc("unused#310", unimplementedFunc),
+    /*  311 */ SyscallDesc("unused#311", unimplementedFunc),
+    /*  312 */ SyscallDesc("unused#312", unimplementedFunc),
+    /*  313 */ SyscallDesc("unused#313", unimplementedFunc),
+    /*  314 */ SyscallDesc("unused#314", unimplementedFunc),
+    /*  315 */ SyscallDesc("unused#315", unimplementedFunc),
+    /*  316 */ SyscallDesc("unused#316", unimplementedFunc),
+    /*  317 */ SyscallDesc("unused#317", unimplementedFunc),
+    /*  318 */ SyscallDesc("unused#318", unimplementedFunc),
+    /*  319 */ SyscallDesc("unused#319", unimplementedFunc),
+    /*  320 */ SyscallDesc("unused#320", unimplementedFunc),
+    /*  321 */ SyscallDesc("unused#321", unimplementedFunc),
+    /*  322 */ SyscallDesc("unused#322", unimplementedFunc),
+    /*  323 */ SyscallDesc("unused#323", unimplementedFunc),
+    /*  324 */ SyscallDesc("unused#324", unimplementedFunc),
+    /*  325 */ SyscallDesc("unused#325", unimplementedFunc),
+    /*  326 */ SyscallDesc("unused#326", unimplementedFunc),
+    /*  327 */ SyscallDesc("unused#327", unimplementedFunc),
+    /*  328 */ SyscallDesc("unused#328", unimplementedFunc),
+    /*  329 */ SyscallDesc("unused#329", unimplementedFunc),
+    /*  330 */ SyscallDesc("unused#330", unimplementedFunc),
+    /*  331 */ SyscallDesc("unused#331", unimplementedFunc),
+    /*  332 */ SyscallDesc("unused#332", unimplementedFunc),
+    /*  333 */ SyscallDesc("unused#333", unimplementedFunc),
+    /*  334 */ SyscallDesc("unused#334", unimplementedFunc),
+    /*  335 */ SyscallDesc("unused#335", unimplementedFunc),
+    /*  336 */ SyscallDesc("unused#336", unimplementedFunc),
+    /*  337 */ SyscallDesc("unused#337", unimplementedFunc),
+    /*  338 */ SyscallDesc("unused#338", unimplementedFunc),
+    /*  339 */ SyscallDesc("unused#339", unimplementedFunc),
+    /*  340 */ SyscallDesc("unused#340", unimplementedFunc),
+    /*  341 */ SyscallDesc("unused#341", unimplementedFunc),
+    /*  342 */ SyscallDesc("unused#342", unimplementedFunc),
+    /*  343 */ SyscallDesc("unused#343", unimplementedFunc),
+    /*  344 */ SyscallDesc("unused#344", unimplementedFunc),
+    /*  345 */ SyscallDesc("unused#345", unimplementedFunc),
+    /*  346 */ SyscallDesc("unused#346", unimplementedFunc),
+    /*  347 */ SyscallDesc("unused#347", unimplementedFunc),
+    /*  348 */ SyscallDesc("unused#348", unimplementedFunc),
+    /*  349 */ SyscallDesc("unused#349", unimplementedFunc),
+    /*  350 */ SyscallDesc("unused#350", unimplementedFunc),
+    /*  351 */ SyscallDesc("unused#351", unimplementedFunc),
+    /*  352 */ SyscallDesc("unused#352", unimplementedFunc),
+    /*  353 */ SyscallDesc("unused#353", unimplementedFunc),
+    /*  354 */ SyscallDesc("unused#354", unimplementedFunc),
+    /*  355 */ SyscallDesc("unused#355", unimplementedFunc),
+    /*  356 */ SyscallDesc("unused#356", unimplementedFunc),
+    /*  357 */ SyscallDesc("unused#357", unimplementedFunc),
+    /*  358 */ SyscallDesc("unused#358", unimplementedFunc),
+    /*  359 */ SyscallDesc("unused#359", unimplementedFunc),
+    /*  360 */ SyscallDesc("unused#360", unimplementedFunc),
+    /*  361 */ SyscallDesc("unused#361", unimplementedFunc),
+    /*  362 */ SyscallDesc("unused#362", unimplementedFunc),
+    /*  363 */ SyscallDesc("unused#363", unimplementedFunc),
+    /*  364 */ SyscallDesc("unused#364", unimplementedFunc),
+    /*  365 */ SyscallDesc("unused#365", unimplementedFunc),
+    /*  366 */ SyscallDesc("unused#366", unimplementedFunc),
+    /*  367 */ SyscallDesc("unused#367", unimplementedFunc),
+    /*  368 */ SyscallDesc("unused#368", unimplementedFunc),
+    /*  369 */ SyscallDesc("unused#369", unimplementedFunc),
+    /*  370 */ SyscallDesc("unused#370", unimplementedFunc),
+    /*  371 */ SyscallDesc("unused#371", unimplementedFunc),
+    /*  372 */ SyscallDesc("unused#372", unimplementedFunc),
+    /*  373 */ SyscallDesc("unused#373", unimplementedFunc),
+    /*  374 */ SyscallDesc("unused#374", unimplementedFunc),
+    /*  375 */ SyscallDesc("unused#375", unimplementedFunc),
+    /*  376 */ SyscallDesc("unused#376", unimplementedFunc),
+    /*  377 */ SyscallDesc("unused#377", unimplementedFunc),
+    /*  378 */ SyscallDesc("unused#378", unimplementedFunc),
+    /*  379 */ SyscallDesc("unused#379", unimplementedFunc),
+    /*  380 */ SyscallDesc("unused#380", unimplementedFunc),
+    /*  381 */ SyscallDesc("unused#381", unimplementedFunc),
+    /*  382 */ SyscallDesc("unused#382", unimplementedFunc),
+    /*  383 */ SyscallDesc("unused#383", unimplementedFunc),
+    /*  384 */ SyscallDesc("unused#384", unimplementedFunc),
+    /*  385 */ SyscallDesc("unused#385", unimplementedFunc),
+    /*  386 */ SyscallDesc("unused#386", unimplementedFunc),
+    /*  387 */ SyscallDesc("unused#387", unimplementedFunc),
+    /*  388 */ SyscallDesc("unused#388", unimplementedFunc),
+    /*  389 */ SyscallDesc("unused#389", unimplementedFunc),
+    /*  390 */ SyscallDesc("unused#390", unimplementedFunc),
+    /*  391 */ SyscallDesc("unused#391", unimplementedFunc),
+    /*  392 */ SyscallDesc("unused#392", unimplementedFunc),
+    /*  393 */ SyscallDesc("unused#393", unimplementedFunc),
+    /*  394 */ SyscallDesc("unused#394", unimplementedFunc),
+    /*  395 */ SyscallDesc("unused#395", unimplementedFunc),
+    /*  396 */ SyscallDesc("unused#396", unimplementedFunc),
+    /*  397 */ SyscallDesc("unused#397", unimplementedFunc),
+    /*  398 */ SyscallDesc("unused#398", unimplementedFunc),
+    /*  399 */ SyscallDesc("unused#399", unimplementedFunc),
+    /*  400 */ SyscallDesc("unused#400", unimplementedFunc),
+    /*  401 */ SyscallDesc("unused#401", unimplementedFunc),
+    /*  402 */ SyscallDesc("unused#402", unimplementedFunc),
+    /*  403 */ SyscallDesc("unused#403", unimplementedFunc),
+    /*  404 */ SyscallDesc("unused#404", unimplementedFunc),
+    /*  405 */ SyscallDesc("unused#405", unimplementedFunc),
+    /*  406 */ SyscallDesc("unused#406", unimplementedFunc),
+    /*  407 */ SyscallDesc("unused#407", unimplementedFunc),
+    /*  408 */ SyscallDesc("unused#408", unimplementedFunc),
+    /*  409 */ SyscallDesc("unused#409", unimplementedFunc),
+    /*  410 */ SyscallDesc("unused#410", unimplementedFunc),
+    /*  411 */ SyscallDesc("unused#411", unimplementedFunc),
+    /*  412 */ SyscallDesc("unused#412", unimplementedFunc),
+    /*  413 */ SyscallDesc("unused#413", unimplementedFunc),
+    /*  414 */ SyscallDesc("unused#414", unimplementedFunc),
+    /*  415 */ SyscallDesc("unused#415", unimplementedFunc),
+    /*  416 */ SyscallDesc("unused#416", unimplementedFunc),
+    /*  417 */ SyscallDesc("unused#417", unimplementedFunc),
+    /*  418 */ SyscallDesc("unused#418", unimplementedFunc),
+    /*  419 */ SyscallDesc("unused#419", unimplementedFunc),
+    /*  420 */ SyscallDesc("unused#420", unimplementedFunc),
+    /*  421 */ SyscallDesc("unused#421", unimplementedFunc),
+    /*  422 */ SyscallDesc("unused#422", unimplementedFunc),
+    /*  423 */ SyscallDesc("unused#423", unimplementedFunc),
+    /*  424 */ SyscallDesc("unused#424", unimplementedFunc),
+    /*  425 */ SyscallDesc("unused#425", unimplementedFunc),
+    /*  426 */ SyscallDesc("unused#426", unimplementedFunc),
+    /*  427 */ SyscallDesc("unused#427", unimplementedFunc),
+    /*  428 */ SyscallDesc("unused#428", unimplementedFunc),
+    /*  429 */ SyscallDesc("unused#429", unimplementedFunc),
+    /*  430 */ SyscallDesc("unused#430", unimplementedFunc),
+    /*  431 */ SyscallDesc("unused#431", unimplementedFunc),
+    /*  432 */ SyscallDesc("unused#432", unimplementedFunc),
+    /*  433 */ SyscallDesc("unused#433", unimplementedFunc),
+    /*  434 */ SyscallDesc("unused#434", unimplementedFunc),
+    /*  435 */ SyscallDesc("unused#435", unimplementedFunc),
+    /*  436 */ SyscallDesc("unused#436", unimplementedFunc),
+    /*  437 */ SyscallDesc("unused#437", unimplementedFunc),
+    /*  438 */ SyscallDesc("unused#438", unimplementedFunc),
+    /*  439 */ SyscallDesc("unused#439", unimplementedFunc),
+    /*  440 */ SyscallDesc("unused#440", unimplementedFunc),
+    /*  441 */ SyscallDesc("unused#441", unimplementedFunc),
+    /*  442 */ SyscallDesc("unused#442", unimplementedFunc),
+    /*  443 */ SyscallDesc("unused#443", unimplementedFunc),
+    /*  444 */ SyscallDesc("unused#444", unimplementedFunc),
+    /*  445 */ SyscallDesc("unused#445", unimplementedFunc),
+    /*  446 */ SyscallDesc("unused#446", unimplementedFunc),
+    /*  447 */ SyscallDesc("unused#447", unimplementedFunc),
+    /*  448 */ SyscallDesc("unused#448", unimplementedFunc),
+    /*  449 */ SyscallDesc("unused#449", unimplementedFunc),
+    /*  450 */ SyscallDesc("unused#450", unimplementedFunc),
+    /*  451 */ SyscallDesc("unused#451", unimplementedFunc),
+    /*  452 */ SyscallDesc("unused#452", unimplementedFunc),
+    /*  453 */ SyscallDesc("unused#453", unimplementedFunc),
+    /*  454 */ SyscallDesc("unused#454", unimplementedFunc),
+    /*  455 */ SyscallDesc("unused#455", unimplementedFunc),
+    /*  456 */ SyscallDesc("unused#456", unimplementedFunc),
+    /*  457 */ SyscallDesc("unused#457", unimplementedFunc),
+    /*  458 */ SyscallDesc("unused#458", unimplementedFunc),
+    /*  459 */ SyscallDesc("unused#459", unimplementedFunc),
+    /*  460 */ SyscallDesc("unused#460", unimplementedFunc),
+    /*  461 */ SyscallDesc("unused#461", unimplementedFunc),
+    /*  462 */ SyscallDesc("unused#462", unimplementedFunc),
+    /*  463 */ SyscallDesc("unused#463", unimplementedFunc),
+    /*  464 */ SyscallDesc("unused#464", unimplementedFunc),
+    /*  465 */ SyscallDesc("unused#465", unimplementedFunc),
+    /*  466 */ SyscallDesc("unused#466", unimplementedFunc),
+    /*  467 */ SyscallDesc("unused#467", unimplementedFunc),
+    /*  468 */ SyscallDesc("unused#468", unimplementedFunc),
+    /*  469 */ SyscallDesc("unused#469", unimplementedFunc),
+    /*  470 */ SyscallDesc("unused#470", unimplementedFunc),
+    /*  471 */ SyscallDesc("unused#471", unimplementedFunc),
+    /*  472 */ SyscallDesc("unused#472", unimplementedFunc),
+    /*  473 */ SyscallDesc("unused#473", unimplementedFunc),
+    /*  474 */ SyscallDesc("unused#474", unimplementedFunc),
+    /*  475 */ SyscallDesc("unused#475", unimplementedFunc),
+    /*  476 */ SyscallDesc("unused#476", unimplementedFunc),
+    /*  477 */ SyscallDesc("unused#477", unimplementedFunc),
+    /*  478 */ SyscallDesc("unused#478", unimplementedFunc),
+    /*  479 */ SyscallDesc("unused#479", unimplementedFunc),
+    /*  480 */ SyscallDesc("unused#480", unimplementedFunc),
+    /*  481 */ SyscallDesc("unused#481", unimplementedFunc),
+    /*  482 */ SyscallDesc("unused#482", unimplementedFunc),
+    /*  483 */ SyscallDesc("unused#483", unimplementedFunc),
+    /*  484 */ SyscallDesc("unused#484", unimplementedFunc),
+    /*  485 */ SyscallDesc("unused#485", unimplementedFunc),
+    /*  486 */ SyscallDesc("unused#486", unimplementedFunc),
+    /*  487 */ SyscallDesc("unused#487", unimplementedFunc),
+    /*  488 */ SyscallDesc("unused#488", unimplementedFunc),
+    /*  489 */ SyscallDesc("unused#489", unimplementedFunc),
+    /*  490 */ SyscallDesc("unused#490", unimplementedFunc),
+    /*  491 */ SyscallDesc("unused#491", unimplementedFunc),
+    /*  492 */ SyscallDesc("unused#492", unimplementedFunc),
+    /*  493 */ SyscallDesc("unused#493", unimplementedFunc),
+    /*  494 */ SyscallDesc("unused#494", unimplementedFunc),
+    /*  495 */ SyscallDesc("unused#495", unimplementedFunc),
+    /*  496 */ SyscallDesc("unused#496", unimplementedFunc),
+    /*  497 */ SyscallDesc("unused#497", unimplementedFunc),
+    /*  498 */ SyscallDesc("unused#498", unimplementedFunc),
+    /*  499 */ SyscallDesc("unused#499", unimplementedFunc),
+    /*  500 */ SyscallDesc("unused#500", unimplementedFunc),
+    /*  501 */ SyscallDesc("unused#501", unimplementedFunc),
+    /*  502 */ SyscallDesc("unused#502", unimplementedFunc),
+    /*  503 */ SyscallDesc("unused#503", unimplementedFunc),
+    /*  504 */ SyscallDesc("unused#504", unimplementedFunc),
+    /*  505 */ SyscallDesc("unused#505", unimplementedFunc),
+    /*  506 */ SyscallDesc("unused#506", unimplementedFunc),
+    /*  507 */ SyscallDesc("unused#507", unimplementedFunc),
+    /*  508 */ SyscallDesc("unused#508", unimplementedFunc),
+    /*  509 */ SyscallDesc("unused#509", unimplementedFunc),
+    /*  510 */ SyscallDesc("unused#510", unimplementedFunc),
+    /*  511 */ SyscallDesc("unused#511", unimplementedFunc),
+    /*  512 */ SyscallDesc("unused#512", unimplementedFunc),
+    /*  513 */ SyscallDesc("unused#513", unimplementedFunc),
+    /*  514 */ SyscallDesc("unused#514", unimplementedFunc),
+    /*  515 */ SyscallDesc("unused#515", unimplementedFunc),
+    /*  516 */ SyscallDesc("unused#516", unimplementedFunc),
+    /*  517 */ SyscallDesc("unused#517", unimplementedFunc),
+    /*  518 */ SyscallDesc("unused#518", unimplementedFunc),
+    /*  519 */ SyscallDesc("unused#519", unimplementedFunc),
+    /*  520 */ SyscallDesc("unused#520", unimplementedFunc),
+    /*  521 */ SyscallDesc("unused#521", unimplementedFunc),
+    /*  522 */ SyscallDesc("unused#522", unimplementedFunc),
+    /*  523 */ SyscallDesc("unused#523", unimplementedFunc),
+    /*  524 */ SyscallDesc("unused#524", unimplementedFunc),
+    /*  525 */ SyscallDesc("unused#525", unimplementedFunc),
+    /*  526 */ SyscallDesc("unused#526", unimplementedFunc),
+    /*  527 */ SyscallDesc("unused#527", unimplementedFunc),
+    /*  528 */ SyscallDesc("unused#528", unimplementedFunc),
+    /*  529 */ SyscallDesc("unused#529", unimplementedFunc),
+    /*  530 */ SyscallDesc("unused#530", unimplementedFunc),
+    /*  531 */ SyscallDesc("unused#531", unimplementedFunc),
+    /*  532 */ SyscallDesc("unused#532", unimplementedFunc),
+    /*  533 */ SyscallDesc("unused#533", unimplementedFunc),
+    /*  534 */ SyscallDesc("unused#534", unimplementedFunc),
+    /*  535 */ SyscallDesc("unused#535", unimplementedFunc),
+    /*  536 */ SyscallDesc("unused#536", unimplementedFunc),
+    /*  537 */ SyscallDesc("unused#537", unimplementedFunc),
+    /*  538 */ SyscallDesc("unused#538", unimplementedFunc),
+    /*  539 */ SyscallDesc("unused#539", unimplementedFunc),
+    /*  540 */ SyscallDesc("unused#540", unimplementedFunc),
+    /*  541 */ SyscallDesc("unused#541", unimplementedFunc),
+    /*  542 */ SyscallDesc("unused#542", unimplementedFunc),
+    /*  543 */ SyscallDesc("unused#543", unimplementedFunc),
+    /*  544 */ SyscallDesc("unused#544", unimplementedFunc),
+    /*  545 */ SyscallDesc("unused#545", unimplementedFunc),
+    /*  546 */ SyscallDesc("unused#546", unimplementedFunc),
+    /*  547 */ SyscallDesc("unused#547", unimplementedFunc),
+    /*  548 */ SyscallDesc("unused#548", unimplementedFunc),
+    /*  549 */ SyscallDesc("unused#549", unimplementedFunc),
+    /*  550 */ SyscallDesc("unused#550", unimplementedFunc),
+    /*  551 */ SyscallDesc("unused#551", unimplementedFunc),
+    /*  552 */ SyscallDesc("unused#552", unimplementedFunc),
+    /*  553 */ SyscallDesc("unused#553", unimplementedFunc),
+    /*  554 */ SyscallDesc("unused#554", unimplementedFunc),
+    /*  555 */ SyscallDesc("unused#555", unimplementedFunc),
+    /*  556 */ SyscallDesc("unused#556", unimplementedFunc),
+    /*  557 */ SyscallDesc("unused#557", unimplementedFunc),
+    /*  558 */ SyscallDesc("unused#558", unimplementedFunc),
+    /*  559 */ SyscallDesc("unused#559", unimplementedFunc),
+    /*  560 */ SyscallDesc("unused#560", unimplementedFunc),
+    /*  561 */ SyscallDesc("unused#561", unimplementedFunc),
+    /*  562 */ SyscallDesc("unused#562", unimplementedFunc),
+    /*  563 */ SyscallDesc("unused#563", unimplementedFunc),
+    /*  564 */ SyscallDesc("unused#564", unimplementedFunc),
+    /*  565 */ SyscallDesc("unused#565", unimplementedFunc),
+    /*  566 */ SyscallDesc("unused#566", unimplementedFunc),
+    /*  567 */ SyscallDesc("unused#567", unimplementedFunc),
+    /*  568 */ SyscallDesc("unused#568", unimplementedFunc),
+    /*  569 */ SyscallDesc("unused#569", unimplementedFunc),
+    /*  570 */ SyscallDesc("unused#570", unimplementedFunc),
+    /*  571 */ SyscallDesc("unused#571", unimplementedFunc),
+    /*  572 */ SyscallDesc("unused#572", unimplementedFunc),
+    /*  573 */ SyscallDesc("unused#573", unimplementedFunc),
+    /*  574 */ SyscallDesc("unused#574", unimplementedFunc),
+    /*  575 */ SyscallDesc("unused#575", unimplementedFunc),
+    /*  576 */ SyscallDesc("unused#576", unimplementedFunc),
+    /*  577 */ SyscallDesc("unused#577", unimplementedFunc),
+    /*  578 */ SyscallDesc("unused#578", unimplementedFunc),
+    /*  579 */ SyscallDesc("unused#579", unimplementedFunc),
+    /*  580 */ SyscallDesc("unused#580", unimplementedFunc),
+    /*  581 */ SyscallDesc("unused#581", unimplementedFunc),
+    /*  582 */ SyscallDesc("unused#582", unimplementedFunc),
+    /*  583 */ SyscallDesc("unused#583", unimplementedFunc),
+    /*  584 */ SyscallDesc("unused#584", unimplementedFunc),
+    /*  585 */ SyscallDesc("unused#585", unimplementedFunc),
+    /*  586 */ SyscallDesc("unused#586", unimplementedFunc),
+    /*  587 */ SyscallDesc("unused#587", unimplementedFunc),
+    /*  588 */ SyscallDesc("unused#588", unimplementedFunc),
+    /*  589 */ SyscallDesc("unused#589", unimplementedFunc),
+    /*  590 */ SyscallDesc("unused#590", unimplementedFunc),
+    /*  591 */ SyscallDesc("unused#591", unimplementedFunc),
+    /*  592 */ SyscallDesc("unused#592", unimplementedFunc),
+    /*  593 */ SyscallDesc("unused#593", unimplementedFunc),
+    /*  594 */ SyscallDesc("unused#594", unimplementedFunc),
+    /*  595 */ SyscallDesc("unused#595", unimplementedFunc),
+    /*  596 */ SyscallDesc("unused#596", unimplementedFunc),
+    /*  597 */ SyscallDesc("unused#597", unimplementedFunc),
+    /*  598 */ SyscallDesc("unused#598", unimplementedFunc),
+    /*  599 */ SyscallDesc("unused#599", unimplementedFunc),
+    /*  600 */ SyscallDesc("unused#600", unimplementedFunc),
+    /*  601 */ SyscallDesc("unused#601", unimplementedFunc),
+    /*  602 */ SyscallDesc("unused#602", unimplementedFunc),
+    /*  603 */ SyscallDesc("unused#603", unimplementedFunc),
+    /*  604 */ SyscallDesc("unused#604", unimplementedFunc),
+    /*  605 */ SyscallDesc("unused#605", unimplementedFunc),
+    /*  606 */ SyscallDesc("unused#606", unimplementedFunc),
+    /*  607 */ SyscallDesc("unused#607", unimplementedFunc),
+    /*  608 */ SyscallDesc("unused#608", unimplementedFunc),
+    /*  609 */ SyscallDesc("unused#609", unimplementedFunc),
+    /*  610 */ SyscallDesc("unused#610", unimplementedFunc),
+    /*  611 */ SyscallDesc("unused#611", unimplementedFunc),
+    /*  612 */ SyscallDesc("unused#612", unimplementedFunc),
+    /*  613 */ SyscallDesc("unused#613", unimplementedFunc),
+    /*  614 */ SyscallDesc("unused#614", unimplementedFunc),
+    /*  615 */ SyscallDesc("unused#615", unimplementedFunc),
+    /*  616 */ SyscallDesc("unused#616", unimplementedFunc),
+    /*  617 */ SyscallDesc("unused#617", unimplementedFunc),
+    /*  618 */ SyscallDesc("unused#618", unimplementedFunc),
+    /*  619 */ SyscallDesc("unused#619", unimplementedFunc),
+    /*  620 */ SyscallDesc("unused#620", unimplementedFunc),
+    /*  621 */ SyscallDesc("unused#621", unimplementedFunc),
+    /*  622 */ SyscallDesc("unused#622", unimplementedFunc),
+    /*  623 */ SyscallDesc("unused#623", unimplementedFunc),
+    /*  624 */ SyscallDesc("unused#624", unimplementedFunc),
+    /*  625 */ SyscallDesc("unused#625", unimplementedFunc),
+    /*  626 */ SyscallDesc("unused#626", unimplementedFunc),
+    /*  627 */ SyscallDesc("unused#627", unimplementedFunc),
+    /*  628 */ SyscallDesc("unused#628", unimplementedFunc),
+    /*  629 */ SyscallDesc("unused#629", unimplementedFunc),
+    /*  630 */ SyscallDesc("unused#630", unimplementedFunc),
+    /*  631 */ SyscallDesc("unused#631", unimplementedFunc),
+    /*  632 */ SyscallDesc("unused#632", unimplementedFunc),
+    /*  633 */ SyscallDesc("unused#633", unimplementedFunc),
+    /*  634 */ SyscallDesc("unused#634", unimplementedFunc),
+    /*  635 */ SyscallDesc("unused#635", unimplementedFunc),
+    /*  636 */ SyscallDesc("unused#636", unimplementedFunc),
+    /*  637 */ SyscallDesc("unused#637", unimplementedFunc),
+    /*  638 */ SyscallDesc("unused#638", unimplementedFunc),
+    /*  639 */ SyscallDesc("unused#639", unimplementedFunc),
+    /*  640 */ SyscallDesc("unused#640", unimplementedFunc),
+    /*  641 */ SyscallDesc("unused#641", unimplementedFunc),
+    /*  642 */ SyscallDesc("unused#642", unimplementedFunc),
+    /*  643 */ SyscallDesc("unused#643", unimplementedFunc),
+    /*  644 */ SyscallDesc("unused#644", unimplementedFunc),
+    /*  645 */ SyscallDesc("unused#645", unimplementedFunc),
+    /*  646 */ SyscallDesc("unused#646", unimplementedFunc),
+    /*  647 */ SyscallDesc("unused#647", unimplementedFunc),
+    /*  648 */ SyscallDesc("unused#648", unimplementedFunc),
+    /*  649 */ SyscallDesc("unused#649", unimplementedFunc),
+    /*  650 */ SyscallDesc("unused#650", unimplementedFunc),
+    /*  651 */ SyscallDesc("unused#651", unimplementedFunc),
+    /*  652 */ SyscallDesc("unused#652", unimplementedFunc),
+    /*  653 */ SyscallDesc("unused#653", unimplementedFunc),
+    /*  654 */ SyscallDesc("unused#654", unimplementedFunc),
+    /*  655 */ SyscallDesc("unused#655", unimplementedFunc),
+    /*  656 */ SyscallDesc("unused#656", unimplementedFunc),
+    /*  657 */ SyscallDesc("unused#657", unimplementedFunc),
+    /*  658 */ SyscallDesc("unused#658", unimplementedFunc),
+    /*  659 */ SyscallDesc("unused#659", unimplementedFunc),
+    /*  660 */ SyscallDesc("unused#660", unimplementedFunc),
+    /*  661 */ SyscallDesc("unused#661", unimplementedFunc),
+    /*  662 */ SyscallDesc("unused#662", unimplementedFunc),
+    /*  663 */ SyscallDesc("unused#663", unimplementedFunc),
+    /*  664 */ SyscallDesc("unused#664", unimplementedFunc),
+    /*  665 */ SyscallDesc("unused#665", unimplementedFunc),
+    /*  666 */ SyscallDesc("unused#666", unimplementedFunc),
+    /*  667 */ SyscallDesc("unused#667", unimplementedFunc),
+    /*  668 */ SyscallDesc("unused#668", unimplementedFunc),
+    /*  669 */ SyscallDesc("unused#669", unimplementedFunc),
+    /*  670 */ SyscallDesc("unused#670", unimplementedFunc),
+    /*  671 */ SyscallDesc("unused#671", unimplementedFunc),
+    /*  672 */ SyscallDesc("unused#672", unimplementedFunc),
+    /*  673 */ SyscallDesc("unused#673", unimplementedFunc),
+    /*  674 */ SyscallDesc("unused#674", unimplementedFunc),
+    /*  675 */ SyscallDesc("unused#675", unimplementedFunc),
+    /*  676 */ SyscallDesc("unused#676", unimplementedFunc),
+    /*  677 */ SyscallDesc("unused#677", unimplementedFunc),
+    /*  678 */ SyscallDesc("unused#678", unimplementedFunc),
+    /*  679 */ SyscallDesc("unused#679", unimplementedFunc),
+    /*  680 */ SyscallDesc("unused#680", unimplementedFunc),
+    /*  681 */ SyscallDesc("unused#681", unimplementedFunc),
+    /*  682 */ SyscallDesc("unused#682", unimplementedFunc),
+    /*  683 */ SyscallDesc("unused#683", unimplementedFunc),
+    /*  684 */ SyscallDesc("unused#684", unimplementedFunc),
+    /*  685 */ SyscallDesc("unused#685", unimplementedFunc),
+    /*  686 */ SyscallDesc("unused#686", unimplementedFunc),
+    /*  687 */ SyscallDesc("unused#687", unimplementedFunc),
+    /*  688 */ SyscallDesc("unused#688", unimplementedFunc),
+    /*  689 */ SyscallDesc("unused#689", unimplementedFunc),
+    /*  690 */ SyscallDesc("unused#690", unimplementedFunc),
+    /*  691 */ SyscallDesc("unused#691", unimplementedFunc),
+    /*  692 */ SyscallDesc("unused#692", unimplementedFunc),
+    /*  693 */ SyscallDesc("unused#693", unimplementedFunc),
+    /*  694 */ SyscallDesc("unused#694", unimplementedFunc),
+    /*  695 */ SyscallDesc("unused#695", unimplementedFunc),
+    /*  696 */ SyscallDesc("unused#696", unimplementedFunc),
+    /*  697 */ SyscallDesc("unused#697", unimplementedFunc),
+    /*  698 */ SyscallDesc("unused#698", unimplementedFunc),
+    /*  699 */ SyscallDesc("unused#699", unimplementedFunc),
+    /*  700 */ SyscallDesc("unused#700", unimplementedFunc),
+    /*  701 */ SyscallDesc("unused#701", unimplementedFunc),
+    /*  702 */ SyscallDesc("unused#702", unimplementedFunc),
+    /*  703 */ SyscallDesc("unused#703", unimplementedFunc),
+    /*  704 */ SyscallDesc("unused#704", unimplementedFunc),
+    /*  705 */ SyscallDesc("unused#705", unimplementedFunc),
+    /*  706 */ SyscallDesc("unused#706", unimplementedFunc),
+    /*  707 */ SyscallDesc("unused#707", unimplementedFunc),
+    /*  708 */ SyscallDesc("unused#708", unimplementedFunc),
+    /*  709 */ SyscallDesc("unused#709", unimplementedFunc),
+    /*  710 */ SyscallDesc("unused#710", unimplementedFunc),
+    /*  711 */ SyscallDesc("unused#711", unimplementedFunc),
+    /*  712 */ SyscallDesc("unused#712", unimplementedFunc),
+    /*  713 */ SyscallDesc("unused#713", unimplementedFunc),
+    /*  714 */ SyscallDesc("unused#714", unimplementedFunc),
+    /*  715 */ SyscallDesc("unused#715", unimplementedFunc),
+    /*  716 */ SyscallDesc("unused#716", unimplementedFunc),
+    /*  717 */ SyscallDesc("unused#717", unimplementedFunc),
+    /*  718 */ SyscallDesc("unused#718", unimplementedFunc),
+    /*  719 */ SyscallDesc("unused#719", unimplementedFunc),
+    /*  720 */ SyscallDesc("unused#720", unimplementedFunc),
+    /*  721 */ SyscallDesc("unused#721", unimplementedFunc),
+    /*  722 */ SyscallDesc("unused#722", unimplementedFunc),
+    /*  723 */ SyscallDesc("unused#723", unimplementedFunc),
+    /*  724 */ SyscallDesc("unused#724", unimplementedFunc),
+    /*  725 */ SyscallDesc("unused#725", unimplementedFunc),
+    /*  726 */ SyscallDesc("unused#726", unimplementedFunc),
+    /*  727 */ SyscallDesc("unused#727", unimplementedFunc),
+    /*  728 */ SyscallDesc("unused#728", unimplementedFunc),
+    /*  729 */ SyscallDesc("unused#729", unimplementedFunc),
+    /*  730 */ SyscallDesc("unused#730", unimplementedFunc),
+    /*  731 */ SyscallDesc("unused#731", unimplementedFunc),
+    /*  732 */ SyscallDesc("unused#732", unimplementedFunc),
+    /*  733 */ SyscallDesc("unused#733", unimplementedFunc),
+    /*  734 */ SyscallDesc("unused#734", unimplementedFunc),
+    /*  735 */ SyscallDesc("unused#735", unimplementedFunc),
+    /*  736 */ SyscallDesc("unused#736", unimplementedFunc),
+    /*  737 */ SyscallDesc("unused#737", unimplementedFunc),
+    /*  738 */ SyscallDesc("unused#738", unimplementedFunc),
+    /*  739 */ SyscallDesc("unused#739", unimplementedFunc),
+    /*  740 */ SyscallDesc("unused#740", unimplementedFunc),
+    /*  741 */ SyscallDesc("unused#741", unimplementedFunc),
+    /*  742 */ SyscallDesc("unused#742", unimplementedFunc),
+    /*  743 */ SyscallDesc("unused#743", unimplementedFunc),
+    /*  744 */ SyscallDesc("unused#744", unimplementedFunc),
+    /*  745 */ SyscallDesc("unused#745", unimplementedFunc),
+    /*  746 */ SyscallDesc("unused#746", unimplementedFunc),
+    /*  747 */ SyscallDesc("unused#747", unimplementedFunc),
+    /*  748 */ SyscallDesc("unused#748", unimplementedFunc),
+    /*  749 */ SyscallDesc("unused#749", unimplementedFunc),
+    /*  750 */ SyscallDesc("unused#750", unimplementedFunc),
+    /*  751 */ SyscallDesc("unused#751", unimplementedFunc),
+    /*  752 */ SyscallDesc("unused#752", unimplementedFunc),
+    /*  753 */ SyscallDesc("unused#753", unimplementedFunc),
+    /*  754 */ SyscallDesc("unused#754", unimplementedFunc),
+    /*  755 */ SyscallDesc("unused#755", unimplementedFunc),
+    /*  756 */ SyscallDesc("unused#756", unimplementedFunc),
+    /*  757 */ SyscallDesc("unused#757", unimplementedFunc),
+    /*  758 */ SyscallDesc("unused#758", unimplementedFunc),
+    /*  759 */ SyscallDesc("unused#759", unimplementedFunc),
+    /*  760 */ SyscallDesc("unused#760", unimplementedFunc),
+    /*  761 */ SyscallDesc("unused#761", unimplementedFunc),
+    /*  762 */ SyscallDesc("unused#762", unimplementedFunc),
+    /*  763 */ SyscallDesc("unused#763", unimplementedFunc),
+    /*  764 */ SyscallDesc("unused#764", unimplementedFunc),
+    /*  765 */ SyscallDesc("unused#765", unimplementedFunc),
+    /*  766 */ SyscallDesc("unused#766", unimplementedFunc),
+    /*  767 */ SyscallDesc("unused#767", unimplementedFunc),
+    /*  768 */ SyscallDesc("unused#768", unimplementedFunc),
+    /*  769 */ SyscallDesc("unused#769", unimplementedFunc),
+    /*  770 */ SyscallDesc("unused#770", unimplementedFunc),
+    /*  771 */ SyscallDesc("unused#771", unimplementedFunc),
+    /*  772 */ SyscallDesc("unused#772", unimplementedFunc),
+    /*  773 */ SyscallDesc("unused#773", unimplementedFunc),
+    /*  774 */ SyscallDesc("unused#774", unimplementedFunc),
+    /*  775 */ SyscallDesc("unused#775", unimplementedFunc),
+    /*  776 */ SyscallDesc("unused#776", unimplementedFunc),
+    /*  777 */ SyscallDesc("unused#777", unimplementedFunc),
+    /*  778 */ SyscallDesc("unused#778", unimplementedFunc),
+    /*  779 */ SyscallDesc("unused#779", unimplementedFunc),
+    /*  780 */ SyscallDesc("unused#780", unimplementedFunc),
+    /*  781 */ SyscallDesc("unused#781", unimplementedFunc),
+    /*  782 */ SyscallDesc("unused#782", unimplementedFunc),
+    /*  783 */ SyscallDesc("unused#783", unimplementedFunc),
+    /*  784 */ SyscallDesc("unused#784", unimplementedFunc),
+    /*  785 */ SyscallDesc("unused#785", unimplementedFunc),
+    /*  786 */ SyscallDesc("unused#786", unimplementedFunc),
+    /*  787 */ SyscallDesc("unused#787", unimplementedFunc),
+    /*  788 */ SyscallDesc("unused#788", unimplementedFunc),
+    /*  789 */ SyscallDesc("unused#789", unimplementedFunc),
+    /*  790 */ SyscallDesc("unused#790", unimplementedFunc),
+    /*  791 */ SyscallDesc("unused#791", unimplementedFunc),
+    /*  792 */ SyscallDesc("unused#792", unimplementedFunc),
+    /*  793 */ SyscallDesc("unused#793", unimplementedFunc),
+    /*  794 */ SyscallDesc("unused#794", unimplementedFunc),
+    /*  795 */ SyscallDesc("unused#795", unimplementedFunc),
+    /*  796 */ SyscallDesc("unused#796", unimplementedFunc),
+    /*  797 */ SyscallDesc("unused#797", unimplementedFunc),
+    /*  798 */ SyscallDesc("unused#798", unimplementedFunc),
+    /*  799 */ SyscallDesc("unused#799", unimplementedFunc),
+    /*  800 */ SyscallDesc("unused#800", unimplementedFunc),
+    /*  801 */ SyscallDesc("unused#801", unimplementedFunc),
+    /*  802 */ SyscallDesc("unused#802", unimplementedFunc),
+    /*  803 */ SyscallDesc("unused#803", unimplementedFunc),
+    /*  804 */ SyscallDesc("unused#804", unimplementedFunc),
+    /*  805 */ SyscallDesc("unused#805", unimplementedFunc),
+    /*  806 */ SyscallDesc("unused#806", unimplementedFunc),
+    /*  807 */ SyscallDesc("unused#807", unimplementedFunc),
+    /*  808 */ SyscallDesc("unused#808", unimplementedFunc),
+    /*  809 */ SyscallDesc("unused#809", unimplementedFunc),
+    /*  810 */ SyscallDesc("unused#810", unimplementedFunc),
+    /*  811 */ SyscallDesc("unused#811", unimplementedFunc),
+    /*  812 */ SyscallDesc("unused#812", unimplementedFunc),
+    /*  813 */ SyscallDesc("unused#813", unimplementedFunc),
+    /*  814 */ SyscallDesc("unused#814", unimplementedFunc),
+    /*  815 */ SyscallDesc("unused#815", unimplementedFunc),
+    /*  816 */ SyscallDesc("unused#816", unimplementedFunc),
+    /*  817 */ SyscallDesc("unused#817", unimplementedFunc),
+    /*  818 */ SyscallDesc("unused#818", unimplementedFunc),
+    /*  819 */ SyscallDesc("unused#819", unimplementedFunc),
+    /*  820 */ SyscallDesc("unused#820", unimplementedFunc),
+    /*  821 */ SyscallDesc("unused#821", unimplementedFunc),
+    /*  822 */ SyscallDesc("unused#822", unimplementedFunc),
+    /*  823 */ SyscallDesc("unused#823", unimplementedFunc),
+    /*  824 */ SyscallDesc("unused#824", unimplementedFunc),
+    /*  825 */ SyscallDesc("unused#825", unimplementedFunc),
+    /*  826 */ SyscallDesc("unused#826", unimplementedFunc),
+    /*  827 */ SyscallDesc("unused#827", unimplementedFunc),
+    /*  828 */ SyscallDesc("unused#828", unimplementedFunc),
+    /*  829 */ SyscallDesc("unused#829", unimplementedFunc),
+    /*  830 */ SyscallDesc("unused#830", unimplementedFunc),
+    /*  831 */ SyscallDesc("unused#831", unimplementedFunc),
+    /*  832 */ SyscallDesc("unused#832", unimplementedFunc),
+    /*  833 */ SyscallDesc("unused#833", unimplementedFunc),
+    /*  834 */ SyscallDesc("unused#834", unimplementedFunc),
+    /*  835 */ SyscallDesc("unused#835", unimplementedFunc),
+    /*  836 */ SyscallDesc("unused#836", unimplementedFunc),
+    /*  837 */ SyscallDesc("unused#837", unimplementedFunc),
+    /*  838 */ SyscallDesc("unused#838", unimplementedFunc),
+    /*  839 */ SyscallDesc("unused#839", unimplementedFunc),
+    /*  840 */ SyscallDesc("unused#840", unimplementedFunc),
+    /*  841 */ SyscallDesc("unused#841", unimplementedFunc),
+    /*  842 */ SyscallDesc("unused#842", unimplementedFunc),
+    /*  843 */ SyscallDesc("unused#843", unimplementedFunc),
+    /*  844 */ SyscallDesc("unused#844", unimplementedFunc),
+    /*  845 */ SyscallDesc("unused#845", unimplementedFunc),
+    /*  846 */ SyscallDesc("unused#846", unimplementedFunc),
+    /*  847 */ SyscallDesc("unused#847", unimplementedFunc),
+    /*  848 */ SyscallDesc("unused#848", unimplementedFunc),
+    /*  849 */ SyscallDesc("unused#849", unimplementedFunc),
+    /*  850 */ SyscallDesc("unused#850", unimplementedFunc),
+    /*  851 */ SyscallDesc("unused#851", unimplementedFunc),
+    /*  852 */ SyscallDesc("unused#852", unimplementedFunc),
+    /*  853 */ SyscallDesc("unused#853", unimplementedFunc),
+    /*  854 */ SyscallDesc("unused#854", unimplementedFunc),
+    /*  855 */ SyscallDesc("unused#855", unimplementedFunc),
+    /*  856 */ SyscallDesc("unused#856", unimplementedFunc),
+    /*  857 */ SyscallDesc("unused#857", unimplementedFunc),
+    /*  858 */ SyscallDesc("unused#858", unimplementedFunc),
+    /*  859 */ SyscallDesc("unused#859", unimplementedFunc),
+    /*  860 */ SyscallDesc("unused#860", unimplementedFunc),
+    /*  861 */ SyscallDesc("unused#861", unimplementedFunc),
+    /*  862 */ SyscallDesc("unused#862", unimplementedFunc),
+    /*  863 */ SyscallDesc("unused#863", unimplementedFunc),
+    /*  864 */ SyscallDesc("unused#864", unimplementedFunc),
+    /*  865 */ SyscallDesc("unused#865", unimplementedFunc),
+    /*  866 */ SyscallDesc("unused#866", unimplementedFunc),
+    /*  867 */ SyscallDesc("unused#867", unimplementedFunc),
+    /*  868 */ SyscallDesc("unused#868", unimplementedFunc),
+    /*  869 */ SyscallDesc("unused#869", unimplementedFunc),
+    /*  870 */ SyscallDesc("unused#870", unimplementedFunc),
+    /*  871 */ SyscallDesc("unused#871", unimplementedFunc),
+    /*  872 */ SyscallDesc("unused#872", unimplementedFunc),
+    /*  873 */ SyscallDesc("unused#873", unimplementedFunc),
+    /*  874 */ SyscallDesc("unused#874", unimplementedFunc),
+    /*  875 */ SyscallDesc("unused#875", unimplementedFunc),
+    /*  876 */ SyscallDesc("unused#876", unimplementedFunc),
+    /*  877 */ SyscallDesc("unused#877", unimplementedFunc),
+    /*  878 */ SyscallDesc("unused#878", unimplementedFunc),
+    /*  879 */ SyscallDesc("unused#879", unimplementedFunc),
+    /*  880 */ SyscallDesc("unused#880", unimplementedFunc),
+    /*  881 */ SyscallDesc("unused#881", unimplementedFunc),
+    /*  882 */ SyscallDesc("unused#882", unimplementedFunc),
+    /*  883 */ SyscallDesc("unused#883", unimplementedFunc),
+    /*  884 */ SyscallDesc("unused#884", unimplementedFunc),
+    /*  885 */ SyscallDesc("unused#885", unimplementedFunc),
+    /*  886 */ SyscallDesc("unused#886", unimplementedFunc),
+    /*  887 */ SyscallDesc("unused#887", unimplementedFunc),
+    /*  888 */ SyscallDesc("unused#888", unimplementedFunc),
+    /*  889 */ SyscallDesc("unused#889", unimplementedFunc),
+    /*  890 */ SyscallDesc("unused#890", unimplementedFunc),
+    /*  891 */ SyscallDesc("unused#891", unimplementedFunc),
+    /*  892 */ SyscallDesc("unused#892", unimplementedFunc),
+    /*  893 */ SyscallDesc("unused#893", unimplementedFunc),
+    /*  894 */ SyscallDesc("unused#894", unimplementedFunc),
+    /*  895 */ SyscallDesc("unused#895", unimplementedFunc),
+    /*  896 */ SyscallDesc("unused#896", unimplementedFunc),
+    /*  897 */ SyscallDesc("unused#897", unimplementedFunc),
+    /*  898 */ SyscallDesc("unused#898", unimplementedFunc),
+    /*  899 */ SyscallDesc("unused#899", unimplementedFunc),
+    /*  900 */ SyscallDesc("unused#900", unimplementedFunc),
+    /*  901 */ SyscallDesc("unused#901", unimplementedFunc),
+    /*  902 */ SyscallDesc("unused#902", unimplementedFunc),
+    /*  903 */ SyscallDesc("unused#903", unimplementedFunc),
+    /*  904 */ SyscallDesc("unused#904", unimplementedFunc),
+    /*  905 */ SyscallDesc("unused#905", unimplementedFunc),
+    /*  906 */ SyscallDesc("unused#906", unimplementedFunc),
+    /*  907 */ SyscallDesc("unused#907", unimplementedFunc),
+    /*  908 */ SyscallDesc("unused#908", unimplementedFunc),
+    /*  909 */ SyscallDesc("unused#909", unimplementedFunc),
+    /*  910 */ SyscallDesc("unused#910", unimplementedFunc),
+    /*  911 */ SyscallDesc("unused#911", unimplementedFunc),
+    /*  912 */ SyscallDesc("unused#912", unimplementedFunc),
+    /*  913 */ SyscallDesc("unused#913", unimplementedFunc),
+    /*  914 */ SyscallDesc("unused#914", unimplementedFunc),
+    /*  915 */ SyscallDesc("unused#915", unimplementedFunc),
+    /*  916 */ SyscallDesc("unused#916", unimplementedFunc),
+    /*  917 */ SyscallDesc("unused#917", unimplementedFunc),
+    /*  918 */ SyscallDesc("unused#918", unimplementedFunc),
+    /*  919 */ SyscallDesc("unused#919", unimplementedFunc),
+    /*  920 */ SyscallDesc("unused#920", unimplementedFunc),
+    /*  921 */ SyscallDesc("unused#921", unimplementedFunc),
+    /*  922 */ SyscallDesc("unused#922", unimplementedFunc),
+    /*  923 */ SyscallDesc("unused#923", unimplementedFunc),
+    /*  924 */ SyscallDesc("unused#924", unimplementedFunc),
+    /*  925 */ SyscallDesc("unused#925", unimplementedFunc),
+    /*  926 */ SyscallDesc("unused#926", unimplementedFunc),
+    /*  927 */ SyscallDesc("unused#927", unimplementedFunc),
+    /*  928 */ SyscallDesc("unused#928", unimplementedFunc),
+    /*  929 */ SyscallDesc("unused#929", unimplementedFunc),
+    /*  930 */ SyscallDesc("unused#930", unimplementedFunc),
+    /*  931 */ SyscallDesc("unused#931", unimplementedFunc),
+    /*  932 */ SyscallDesc("unused#932", unimplementedFunc),
+    /*  933 */ SyscallDesc("unused#933", unimplementedFunc),
+    /*  934 */ SyscallDesc("unused#934", unimplementedFunc),
+    /*  935 */ SyscallDesc("unused#935", unimplementedFunc),
+    /*  936 */ SyscallDesc("unused#936", unimplementedFunc),
+    /*  937 */ SyscallDesc("unused#937", unimplementedFunc),
+    /*  938 */ SyscallDesc("unused#938", unimplementedFunc),
+    /*  939 */ SyscallDesc("unused#939", unimplementedFunc),
+    /*  940 */ SyscallDesc("unused#940", unimplementedFunc),
+    /*  941 */ SyscallDesc("unused#941", unimplementedFunc),
+    /*  942 */ SyscallDesc("unused#942", unimplementedFunc),
+    /*  943 */ SyscallDesc("unused#943", unimplementedFunc),
+    /*  944 */ SyscallDesc("unused#944", unimplementedFunc),
+    /*  945 */ SyscallDesc("unused#945", unimplementedFunc),
+    /*  946 */ SyscallDesc("unused#946", unimplementedFunc),
+    /*  947 */ SyscallDesc("unused#947", unimplementedFunc),
+    /*  948 */ SyscallDesc("unused#948", unimplementedFunc),
+    /*  949 */ SyscallDesc("unused#949", unimplementedFunc),
+    /*  950 */ SyscallDesc("unused#950", unimplementedFunc),
+    /*  951 */ SyscallDesc("unused#951", unimplementedFunc),
+    /*  952 */ SyscallDesc("unused#952", unimplementedFunc),
+    /*  953 */ SyscallDesc("unused#953", unimplementedFunc),
+    /*  954 */ SyscallDesc("unused#954", unimplementedFunc),
+    /*  955 */ SyscallDesc("unused#955", unimplementedFunc),
+    /*  956 */ SyscallDesc("unused#956", unimplementedFunc),
+    /*  957 */ SyscallDesc("unused#957", unimplementedFunc),
+    /*  958 */ SyscallDesc("unused#958", unimplementedFunc),
+    /*  959 */ SyscallDesc("unused#959", unimplementedFunc),
+    /*  960 */ SyscallDesc("unused#960", unimplementedFunc),
+    /*  961 */ SyscallDesc("unused#961", unimplementedFunc),
+    /*  962 */ SyscallDesc("unused#962", unimplementedFunc),
+    /*  963 */ SyscallDesc("unused#963", unimplementedFunc),
+    /*  964 */ SyscallDesc("unused#964", unimplementedFunc),
+    /*  965 */ SyscallDesc("unused#965", unimplementedFunc),
+    /*  966 */ SyscallDesc("unused#966", unimplementedFunc),
+    /*  967 */ SyscallDesc("unused#967", unimplementedFunc),
+    /*  968 */ SyscallDesc("unused#968", unimplementedFunc),
+    /*  969 */ SyscallDesc("unused#969", unimplementedFunc),
+    /*  970 */ SyscallDesc("unused#970", unimplementedFunc),
+    /*  971 */ SyscallDesc("unused#971", unimplementedFunc),
+    /*  972 */ SyscallDesc("unused#972", unimplementedFunc),
+    /*  973 */ SyscallDesc("unused#973", unimplementedFunc),
+    /*  974 */ SyscallDesc("unused#974", unimplementedFunc),
+    /*  975 */ SyscallDesc("unused#975", unimplementedFunc),
+    /*  976 */ SyscallDesc("unused#976", unimplementedFunc),
+    /*  977 */ SyscallDesc("unused#977", unimplementedFunc),
+    /*  978 */ SyscallDesc("unused#978", unimplementedFunc),
+    /*  979 */ SyscallDesc("unused#979", unimplementedFunc),
+    /*  980 */ SyscallDesc("unused#980", unimplementedFunc),
+    /*  981 */ SyscallDesc("unused#981", unimplementedFunc),
+    /*  982 */ SyscallDesc("unused#982", unimplementedFunc),
+    /*  983 */ SyscallDesc("unused#983", unimplementedFunc),
+    /*  984 */ SyscallDesc("unused#984", unimplementedFunc),
+    /*  985 */ SyscallDesc("unused#985", unimplementedFunc),
+    /*  986 */ SyscallDesc("unused#986", unimplementedFunc),
+    /*  987 */ SyscallDesc("unused#987", unimplementedFunc),
+    /*  988 */ SyscallDesc("unused#988", unimplementedFunc),
+    /*  989 */ SyscallDesc("unused#989", unimplementedFunc),
+    /*  990 */ SyscallDesc("unused#990", unimplementedFunc),
+    /*  991 */ SyscallDesc("unused#991", unimplementedFunc),
+    /*  992 */ SyscallDesc("unused#992", unimplementedFunc),
+    /*  993 */ SyscallDesc("unused#993", unimplementedFunc),
+    /*  994 */ SyscallDesc("unused#994", unimplementedFunc),
+    /*  995 */ SyscallDesc("unused#995", unimplementedFunc),
+    /*  996 */ SyscallDesc("unused#996", unimplementedFunc),
+    /*  997 */ SyscallDesc("unused#997", unimplementedFunc),
+    /*  998 */ SyscallDesc("unused#998", unimplementedFunc),
+    /*  999 */ SyscallDesc("unused#999", unimplementedFunc),
+    /* 1000 */ SyscallDesc("unused#1000", unimplementedFunc),
+    /* 1001 */ SyscallDesc("unused#1001", unimplementedFunc),
+    /* 1002 */ SyscallDesc("unused#1002", unimplementedFunc),
+    /* 1003 */ SyscallDesc("unused#1003", unimplementedFunc),
+    /* 1004 */ SyscallDesc("unused#1004", unimplementedFunc),
+    /* 1005 */ SyscallDesc("unused#1005", unimplementedFunc),
+    /* 1006 */ SyscallDesc("unused#1006", unimplementedFunc),
+    /* 1007 */ SyscallDesc("unused#1007", unimplementedFunc),
+    /* 1008 */ SyscallDesc("unused#1008", unimplementedFunc),
+    /* 1009 */ SyscallDesc("unused#1009", unimplementedFunc),
+    /* 1010 */ SyscallDesc("unused#1010", unimplementedFunc),
+    /* 1011 */ SyscallDesc("unused#1011", unimplementedFunc),
+    /* 1012 */ SyscallDesc("unused#1012", unimplementedFunc),
+    /* 1013 */ SyscallDesc("unused#1013", unimplementedFunc),
+    /* 1014 */ SyscallDesc("unused#1014", unimplementedFunc),
+    /* 1015 */ SyscallDesc("unused#1015", unimplementedFunc),
+    /* 1016 */ SyscallDesc("unused#1016", unimplementedFunc),
+    /* 1017 */ SyscallDesc("unused#1017", unimplementedFunc),
+    /* 1018 */ SyscallDesc("unused#1018", unimplementedFunc),
+    /* 1019 */ SyscallDesc("unused#1019", unimplementedFunc),
+    /* 1020 */ SyscallDesc("unused#1020", unimplementedFunc),
+    /* 1021 */ SyscallDesc("unused#1021", unimplementedFunc),
+    /* 1022 */ SyscallDesc("unused#1022", unimplementedFunc),
+    /* 1023 */ SyscallDesc("unused#1023", unimplementedFunc),
+    /* 1024 */ SyscallDesc("open", openFunc<ArmLinux64>),
+    /* 1025 */ SyscallDesc("link", unimplementedFunc),
+    /* 1026 */ SyscallDesc("unlink", unlinkFunc),
+    /* 1027 */ SyscallDesc("mknod", unimplementedFunc),
+    /* 1028 */ SyscallDesc("chmod", chmodFunc<ArmLinux64>),
+    /* 1029 */ SyscallDesc("chown", unimplementedFunc),
+    /* 1030 */ SyscallDesc("mkdir", mkdirFunc),
+    /* 1031 */ SyscallDesc("rmdir", unimplementedFunc),
+    /* 1032 */ SyscallDesc("lchown", unimplementedFunc),
+    /* 1033 */ SyscallDesc("access", unimplementedFunc),
+    /* 1034 */ SyscallDesc("rename", renameFunc),
+    /* 1035 */ SyscallDesc("readlink", readlinkFunc),
+    /* 1036 */ SyscallDesc("symlink", unimplementedFunc),
+    /* 1037 */ SyscallDesc("utimes", unimplementedFunc),
+    /* 1038 */ SyscallDesc("stat64", stat64Func<ArmLinux64>),
+    /* 1039 */ SyscallDesc("lstat64", lstat64Func<ArmLinux64>),
+    /* 1040 */ SyscallDesc("pipe", pipePseudoFunc),
+    /* 1041 */ SyscallDesc("dup2", unimplementedFunc),
+    /* 1042 */ SyscallDesc("epoll_create", unimplementedFunc),
+    /* 1043 */ SyscallDesc("inotify_init", unimplementedFunc),
+    /* 1044 */ SyscallDesc("eventfd", unimplementedFunc),
+    /* 1045 */ SyscallDesc("signalfd", unimplementedFunc),
+    /* 1046 */ SyscallDesc("sendfile", unimplementedFunc),
+    /* 1047 */ SyscallDesc("ftruncate", ftruncateFunc),
+    /* 1048 */ SyscallDesc("truncate", truncateFunc),
+    /* 1049 */ SyscallDesc("stat", statFunc<ArmLinux64>),
+    /* 1050 */ SyscallDesc("lstat", unimplementedFunc),
+    /* 1051 */ SyscallDesc("fstat", fstatFunc<ArmLinux64>),
+    /* 1052 */ SyscallDesc("fcntl", fcntlFunc),
+    /* 1053 */ SyscallDesc("fadvise64", unimplementedFunc),
+    /* 1054 */ SyscallDesc("newfstatat", unimplementedFunc),
+    /* 1055 */ SyscallDesc("fstatfs", unimplementedFunc),
+    /* 1056 */ SyscallDesc("statfs", unimplementedFunc),
+    /* 1057 */ SyscallDesc("lseek", lseekFunc),
+    /* 1058 */ SyscallDesc("mmap", mmapFunc<ArmLinux64>),
+    /* 1059 */ SyscallDesc("alarm", unimplementedFunc),
+    /* 1060 */ SyscallDesc("getpgrp", unimplementedFunc),
+    /* 1061 */ SyscallDesc("pause", unimplementedFunc),
+    /* 1062 */ SyscallDesc("time", timeFunc<ArmLinux64>),
+    /* 1063 */ SyscallDesc("utime", unimplementedFunc),
+    /* 1064 */ SyscallDesc("creat", unimplementedFunc),
+    /* 1065 */ SyscallDesc("getdents", unimplementedFunc),
+    /* 1066 */ SyscallDesc("futimesat", unimplementedFunc),
+    /* 1067 */ SyscallDesc("select", unimplementedFunc),
+    /* 1068 */ SyscallDesc("poll", unimplementedFunc),
+    /* 1069 */ SyscallDesc("epoll_wait", unimplementedFunc),
+    /* 1070 */ SyscallDesc("ustat", unimplementedFunc),
+    /* 1071 */ SyscallDesc("vfork", unimplementedFunc),
+    /* 1072 */ SyscallDesc("oldwait4", unimplementedFunc),
+    /* 1073 */ SyscallDesc("recv", unimplementedFunc),
+    /* 1074 */ SyscallDesc("send", unimplementedFunc),
+    /* 1075 */ SyscallDesc("bdflush", unimplementedFunc),
+    /* 1076 */ SyscallDesc("umount", unimplementedFunc),
+    /* 1077 */ SyscallDesc("uselib", unimplementedFunc),
+    /* 1078 */ SyscallDesc("_sysctl", unimplementedFunc),
+    /* 1079 */ SyscallDesc("fork", unimplementedFunc)
+};
 
-    tc->getMemProxy().writeBlob(ArmLinuxProcess::commPage + 0x0ff0,
-                                (uint8_t *)&tlsPtr, sizeof(tlsPtr));
-    tc->setMiscReg(MISCREG_TPIDRURO,tlsPtr);
-    return 0;
-}
-
-SyscallDesc ArmLinuxProcess::privSyscallDescs[] = {
+static SyscallDesc privSyscallDescs32[] = {
     /*  1 */ SyscallDesc("breakpoint", unimplementedFunc),
     /*  2 */ SyscallDesc("cacheflush", unimplementedFunc),
     /*  3 */ SyscallDesc("usr26", unimplementedFunc),
     /*  4 */ SyscallDesc("usr32", unimplementedFunc),
-    /*  5 */ SyscallDesc("set_tls", setTLSFunc)
+    /*  5 */ SyscallDesc("set_tls", setTLSFunc32)
 };
 
-ArmLinuxProcess::ArmLinuxProcess(LiveProcessParams * params,
-        ObjectFile *objFile, ObjectFile::Arch _arch)
-    : ArmLiveProcess(params, objFile, _arch),
-     Num_Syscall_Descs(sizeof(syscallDescs) / sizeof(SyscallDesc)),
-     Num_Priv_Syscall_Descs(sizeof(privSyscallDescs) / sizeof(SyscallDesc))
-{ }
+// Indices 1, 3 and 4 are unallocated.
+static SyscallDesc privSyscallDescs64[] = {
+    /*  1 */ SyscallDesc("unallocated", unimplementedFunc),
+    /*  2 */ SyscallDesc("cacheflush", unimplementedFunc),
+    /*  3 */ SyscallDesc("unallocated", unimplementedFunc),
+    /*  4 */ SyscallDesc("unallocated", unimplementedFunc),
+    /*  5 */ SyscallDesc("set_tls", setTLSFunc64)
+};
 
-const Addr ArmLinuxProcess::commPage = 0xffff0000;
+ArmLinuxProcess32::ArmLinuxProcess32(LiveProcessParams * params,
+        ObjectFile *objFile, ObjectFile::Arch _arch)
+    : ArmLiveProcess32(params, objFile, _arch)
+{
+    SyscallTable table;
+
+    table.descs = syscallDescs32;
+    table.size = sizeof(syscallDescs32) / sizeof(SyscallDesc);
+    table.base = 0;
+    syscallTables.push_back(table);
+    table.base = 0x900000;
+    syscallTables.push_back(table);
+
+    table.descs = privSyscallDescs32;
+    table.size = sizeof(privSyscallDescs32) / sizeof(SyscallDesc);
+    table.base = 0xf0001;
+    syscallTables.push_back(table);
+}
+
+ArmLinuxProcess64::ArmLinuxProcess64(LiveProcessParams * params,
+        ObjectFile *objFile, ObjectFile::Arch _arch)
+    : ArmLiveProcess64(params, objFile, _arch)
+{
+    SyscallTable table;
+
+    table.descs = syscallDescs64;
+    table.size = sizeof(syscallDescs64) / sizeof(SyscallDesc);
+    table.base = 0;
+    syscallTables.push_back(table);
+    table.base = 0x900000;
+    syscallTables.push_back(table);
+
+    table.descs = privSyscallDescs64;
+    table.size = sizeof(privSyscallDescs64) / sizeof(SyscallDesc);
+    table.base = 0x1001;
+    syscallTables.push_back(table);
+}
+
+const Addr ArmLinuxProcess32::commPage = 0xffff0000;
 
 SyscallDesc*
-ArmLinuxProcess::getDesc(int callnum)
+ArmLinuxProcessBits::getLinuxDesc(int callnum)
 {
     // Angel SWI syscalls are unsupported in this release
-    if (callnum == 0x123456) {
+    if (callnum == 0x123456)
         panic("Attempt to execute an ANGEL_SWI system call (newlib-related)");
-    } else if ((callnum & 0x00f00000) == 0x00900000 || 
-            (callnum & 0xf0000) == 0xf0000) {
-        callnum &= 0x000fffff;
-        if ((callnum & 0x0f0000) == 0xf0000) {
-            callnum -= 0x0f0001;
-            if (callnum < 0 || callnum > Num_Priv_Syscall_Descs)
-                return NULL;
-            return &privSyscallDescs[callnum];
-        }
+    for (unsigned i = 0; i < syscallTables.size(); i++) {
+        SyscallDesc *desc = syscallTables[i].getDesc(callnum);
+        if (desc)
+            return desc;
     }
-    // Linux syscalls have to strip off the 0x00900000
+    return NULL;
+}
 
-    if (callnum < 0 || callnum > Num_Syscall_Descs)
+SyscallDesc *
+ArmLinuxProcessBits::SyscallTable::getDesc(int callnum) const
+{
+    int offset = callnum - base;
+    if (offset < 0 || offset >= size)
         return NULL;
+    return &descs[offset];
+}
 
-    return &syscallDescs[callnum];
+SyscallDesc*
+ArmLinuxProcess32::getDesc(int callnum)
+{
+    return getLinuxDesc(callnum);
+}
+
+SyscallDesc*
+ArmLinuxProcess64::getDesc(int callnum)
+{
+    return getLinuxDesc(callnum);
 }
 
 void
-ArmLinuxProcess::initState()
+ArmLinuxProcess32::initState()
 {
-    ArmLiveProcess::initState();
+    ArmLiveProcess32::initState();
     allocateMem(commPage, PageBytes);
     ThreadContext *tc = system->getThreadContext(contextIds[0]);
 
@@ -546,20 +1709,9 @@ ArmLinuxProcess::initState()
     tc->getMemProxy().writeBlob(commPage + 0x0fe0, get_tls, sizeof(get_tls));
 }
 
-ArmISA::IntReg
-ArmLinuxProcess::getSyscallArg(ThreadContext *tc, int &i)
-{
-    // Linux apparently allows more parameter than the ABI says it should.
-    // This limit may need to be increased even further.
-    assert(i < 6);
-    return tc->readIntReg(ArgumentReg0 + i++);
-}
-
 void
-ArmLinuxProcess::setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val)
+ArmLinuxProcess64::initState()
 {
-    // Linux apparently allows more parameter than the ABI says it should.
-    // This limit may need to be increased even further.
-    assert(i < 6);
-    tc->setIntReg(ArgumentReg0 + i, val);
+    ArmLiveProcess64::initState();
+    // The 64 bit equivalent of the comm page would be set up here.
 }
diff --git a/src/arch/arm/linux/process.hh b/src/arch/arm/linux/process.hh
index 7d3a943ed..670739438 100644
--- a/src/arch/arm/linux/process.hh
+++ b/src/arch/arm/linux/process.hh
@@ -1,4 +1,16 @@
 /*
+* Copyright (c) 2011-2012 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2007-2008 The Florida State University
  * All rights reserved.
  *
@@ -31,39 +43,54 @@
 #ifndef __ARM_LINUX_PROCESS_HH__
 #define __ARM_LINUX_PROCESS_HH__
 
+#include <vector>
+
 #include "arch/arm/process.hh"
 
+class ArmLinuxProcessBits
+{
+  protected:
+    SyscallDesc* getLinuxDesc(int callnum);
+
+    struct SyscallTable
+    {
+        int base;
+        SyscallDesc *descs;
+        int size;
+
+        SyscallDesc *getDesc(int offset) const;
+    };
+
+    std::vector<SyscallTable> syscallTables;
+};
+
 /// A process with emulated Arm/Linux syscalls.
-class ArmLinuxProcess : public ArmLiveProcess
+class ArmLinuxProcess32 : public ArmLiveProcess32, public ArmLinuxProcessBits
 {
   public:
-    ArmLinuxProcess(LiveProcessParams * params, ObjectFile *objFile,
-                    ObjectFile::Arch _arch);
-
-    virtual SyscallDesc* getDesc(int callnum);
+    ArmLinuxProcess32(LiveProcessParams * params, ObjectFile *objFile,
+                      ObjectFile::Arch _arch);
 
     void initState();
 
-    ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i);
     /// Explicitly import the otherwise hidden getSyscallArg
     using ArmLiveProcess::getSyscallArg;
-    void setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val);
-
-    /// The target system's hostname.
-    static const char *hostname;
 
     /// A page to hold "kernel" provided functions. The name might be wrong.
     static const Addr commPage;
 
-    /// Array of syscall descriptors, indexed by call number.
-    static SyscallDesc syscallDescs[];
+    SyscallDesc* getDesc(int callnum);
+};
 
-    /// Array of "arm private" syscall descriptors.
-    static SyscallDesc privSyscallDescs[];
+/// A process with emulated Arm/Linux syscalls.
+class ArmLinuxProcess64 : public ArmLiveProcess64, public ArmLinuxProcessBits
+{
+  public:
+    ArmLinuxProcess64(LiveProcessParams * params, ObjectFile *objFile,
+                      ObjectFile::Arch _arch);
 
-    const int Num_Syscall_Descs;
-
-    const int Num_Priv_Syscall_Descs;
+    void initState();
+    SyscallDesc* getDesc(int callnum);
 };
 
 #endif // __ARM_LINUX_PROCESS_HH__
diff --git a/src/arch/arm/linux/system.cc b/src/arch/arm/linux/system.cc
index bc7fd2cb6..216a65899 100644
--- a/src/arch/arm/linux/system.cc
+++ b/src/arch/arm/linux/system.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -63,7 +63,8 @@ using namespace Linux;
 LinuxArmSystem::LinuxArmSystem(Params *p)
     : ArmSystem(p),
       enableContextSwitchStatsDump(p->enable_context_switch_stats_dump),
-      kernelPanicEvent(NULL), kernelOopsEvent(NULL)
+      kernelPanicEvent(NULL), kernelOopsEvent(NULL),
+      bootReleaseAddr(p->boot_release_addr)
 {
     if (p->panic_on_panic) {
         kernelPanicEvent = addKernelFuncEventOrPanic<PanicPCEvent>(
@@ -98,22 +99,30 @@ LinuxArmSystem::LinuxArmSystem(Params *p)
     secDataPtrAddr = 0;
     secDataAddr = 0;
     penReleaseAddr = 0;
+
     kernelSymtab->findAddress("__secondary_data", secDataPtrAddr);
     kernelSymtab->findAddress("secondary_data", secDataAddr);
     kernelSymtab->findAddress("pen_release", penReleaseAddr);
+    kernelSymtab->findAddress("secondary_holding_pen_release", pen64ReleaseAddr);
 
     secDataPtrAddr &= ~ULL(0x7F);
     secDataAddr &= ~ULL(0x7F);
     penReleaseAddr &= ~ULL(0x7F);
+    pen64ReleaseAddr &= ~ULL(0x7F);
+    bootReleaseAddr = (bootReleaseAddr & ~ULL(0x7F)) + loadAddrOffset;
+
 }
 
 bool
 LinuxArmSystem::adderBootUncacheable(Addr a)
 {
     Addr block = a & ~ULL(0x7F);
+
     if (block == secDataPtrAddr || block == secDataAddr ||
-            block == penReleaseAddr)
+            block == penReleaseAddr || pen64ReleaseAddr == block ||
+            block == bootReleaseAddr)
         return true;
+
     return false;
 }
 
@@ -145,7 +154,8 @@ LinuxArmSystem::initState()
     if (kernel_has_fdt_support && dtb_file_specified) {
         // Kernel supports flattened device tree and dtb file specified.
         // Using Device Tree Blob to describe system configuration.
-        inform("Loading DTB file: %s\n", params()->dtb_filename);
+        inform("Loading DTB file: %s at address %#x\n", params()->dtb_filename,
+                params()->atags_addr + loadAddrOffset);
 
         ObjectFile *dtb_file = createObjectFile(params()->dtb_filename, true);
         if (!dtb_file) {
@@ -165,7 +175,7 @@ LinuxArmSystem::initState()
                  "to DTB file: %s\n", params()->dtb_filename);
         }
 
-        dtb_file->setTextBase(params()->atags_addr);
+        dtb_file->setTextBase(params()->atags_addr + loadAddrOffset);
         dtb_file->loadSections(physProxy);
         delete dtb_file;
     } else {
@@ -215,15 +225,17 @@ LinuxArmSystem::initState()
         DPRINTF(Loader, "Boot atags was %d bytes in total\n", size << 2);
         DDUMP(Loader, boot_data, size << 2);
 
-        physProxy.writeBlob(params()->atags_addr, boot_data, size << 2);
+        physProxy.writeBlob(params()->atags_addr + loadAddrOffset, boot_data,
+                size << 2);
 
         delete[] boot_data;
     }
 
+    // Kernel boot requirements to set up r0, r1 and r2 in ARMv7
     for (int i = 0; i < threadContexts.size(); i++) {
         threadContexts[i]->setIntReg(0, 0);
         threadContexts[i]->setIntReg(1, params()->machine_type);
-        threadContexts[i]->setIntReg(2, params()->atags_addr);
+        threadContexts[i]->setIntReg(2, params()->atags_addr + loadAddrOffset);
     }
 }
 
diff --git a/src/arch/arm/linux/system.hh b/src/arch/arm/linux/system.hh
index 008c64429..4ce6ac49e 100644
--- a/src/arch/arm/linux/system.hh
+++ b/src/arch/arm/linux/system.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -126,6 +126,8 @@ class LinuxArmSystem : public ArmSystem
     Addr secDataPtrAddr;
     Addr secDataAddr;
     Addr penReleaseAddr;
+    Addr pen64ReleaseAddr;
+    Addr bootReleaseAddr;
 };
 
 class DumpStatsPCEvent : public PCEvent
diff --git a/src/arch/arm/locked_mem.hh b/src/arch/arm/locked_mem.hh
index f2601f00c..24c78e721 100644
--- a/src/arch/arm/locked_mem.hh
+++ b/src/arch/arm/locked_mem.hh
@@ -53,6 +53,8 @@
  */
 
 #include "arch/arm/miscregs.hh"
+#include "arch/arm/isa_traits.hh"
+#include "debug/LLSC.hh"
 #include "mem/packet.hh"
 #include "mem/request.hh"
 
@@ -62,20 +64,26 @@ template <class XC>
 inline void
 handleLockedSnoop(XC *xc, PacketPtr pkt, Addr cacheBlockMask)
 {
+    DPRINTF(LLSC,"%s:  handleing snoop for address: %#x locked: %d\n",
+            xc->getCpuPtr()->name(),pkt->getAddr(),
+            xc->readMiscReg(MISCREG_LOCKFLAG));
     if (!xc->readMiscReg(MISCREG_LOCKFLAG))
         return;
 
     Addr locked_addr = xc->readMiscReg(MISCREG_LOCKADDR) & cacheBlockMask;
+    // If no caches are attached, the snoop address always needs to be masked
     Addr snoop_addr = pkt->getAddr() & cacheBlockMask;
 
-    if (locked_addr == snoop_addr)
+    DPRINTF(LLSC,"%s:  handleing snoop for address: %#x locked addr: %#x\n",
+            xc->getCpuPtr()->name(),snoop_addr, locked_addr);
+    if (locked_addr == snoop_addr) {
+        DPRINTF(LLSC,"%s: address match, clearing lock and signaling sev\n",
+                xc->getCpuPtr()->name());
         xc->setMiscReg(MISCREG_LOCKFLAG, false);
-}
-
-template <class XC>
-inline void
-handleLockedSnoopHit(XC *xc)
-{
+        // Implement ARMv8 WFE/SEV semantics
+        xc->setMiscReg(MISCREG_SEV_MAILBOX, true);
+        xc->getCpuPtr()->wakeup();
+    }
 }
 
 template <class XC>
@@ -84,8 +92,19 @@ handleLockedRead(XC *xc, Request *req)
 {
     xc->setMiscReg(MISCREG_LOCKADDR, req->getPaddr());
     xc->setMiscReg(MISCREG_LOCKFLAG, true);
+    DPRINTF(LLSC,"%s: Placing address %#x in monitor\n", xc->getCpuPtr()->name(),
+                 req->getPaddr());
 }
 
+template <class XC>
+inline void
+handleLockedSnoopHit(XC *xc)
+{
+    DPRINTF(LLSC,"%s:  handling snoop lock hit address: %#x\n",
+            xc->getCpuPtr()->name(), xc->readMiscReg(MISCREG_LOCKADDR));
+        xc->setMiscReg(MISCREG_LOCKFLAG, false);
+        xc->setMiscReg(MISCREG_SEV_MAILBOX, true);
+}
 
 template <class XC>
 inline bool
@@ -94,6 +113,8 @@ handleLockedWrite(XC *xc, Request *req, Addr cacheBlockMask)
     if (req->isSwap())
         return true;
 
+    DPRINTF(LLSC,"%s: handling locked write for  address %#x in monitor\n",
+            xc->getCpuPtr()->name(), req->getPaddr());
     // Verify that the lock flag is still set and the address
     // is correct
     bool lock_flag = xc->readMiscReg(MISCREG_LOCKFLAG);
@@ -103,6 +124,8 @@ handleLockedWrite(XC *xc, Request *req, Addr cacheBlockMask)
         // don't even bother sending to memory system
         req->setExtraData(0);
         xc->setMiscReg(MISCREG_LOCKFLAG, false);
+        DPRINTF(LLSC,"%s: clearing lock flag in handle locked write\n",
+                xc->getCpuPtr()->name());
         // the rest of this code is not architectural;
         // it's just a debugging aid to help detect
         // livelock by warning on long sequences of failed
diff --git a/src/arch/arm/miscregs.cc b/src/arch/arm/miscregs.cc
index 3a64b557a..6fa304938 100644
--- a/src/arch/arm/miscregs.cc
+++ b/src/arch/arm/miscregs.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -36,11 +36,13 @@
  *
  * Authors: Gabe Black
  *          Ali Saidi
+ *          Giacomo Gabrielli
  */
 
 #include "arch/arm/isa.hh"
 #include "arch/arm/miscregs.hh"
 #include "base/misc.hh"
+#include "cpu/thread_context.hh"
 
 namespace ArmISA
 {
@@ -50,23 +52,31 @@ decodeCP14Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
 {
     switch(crn) {
       case 0:
-        switch (opc2) {
+        switch (opc1) {
           case 0:
-            switch (crm) {
+            switch (opc2) {
               case 0:
-                return MISCREG_DBGDIDR;
-              case 1:
-                return MISCREG_DBGDSCR_INT;
-              default:
-                warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]",
-                     crn, opc1, crm, opc2);
-                return NUM_MISCREGS;
+                switch (crm) {
+                  case 0:
+                    return MISCREG_DBGDIDR;
+                  case 1:
+                    return MISCREG_DBGDSCRint;
+                }
+                break;
             }
-          default:
-            warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]",
-                 crn, opc1, crm, opc2);
-            return NUM_MISCREGS;
+            break;
+          case 7:
+            switch (opc2) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    return MISCREG_JIDR;
+                }
+              break;
+            }
+            break;
         }
+        break;
       case 1:
         switch (opc1) {
           case 6:
@@ -75,29 +85,1270 @@ decodeCP14Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                 switch (opc2) {
                   case 0:
                     return MISCREG_TEEHBR;
-                  default:
-                    warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]",
-                         crn, opc1, crm, opc2);
-                    return NUM_MISCREGS;
                 }
-              default:
-                warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]",
-                     crn, opc1, crm, opc2);
-                return NUM_MISCREGS;
+                break;
             }
-          default:
-            warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]",
-                 crn, opc1, crm, opc2);
-            return NUM_MISCREGS;
+            break;
+          case 7:
+            switch (crm) {
+              case 0:
+                switch (opc2) {
+                  case 0:
+                    return MISCREG_JOSCR;
+                }
+                break;
+            }
+            break;
         }
-      default:
-        warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]",
-             crn, opc1, crm, opc2);
-        return NUM_MISCREGS;
+        break;
+      case 2:
+        switch (opc1) {
+          case 7:
+            switch (crm) {
+              case 0:
+                switch (opc2) {
+                  case 0:
+                    return MISCREG_JMCR;
+                }
+                break;
+            }
+            break;
+        }
+        break;
     }
-
+    // If we get here then it must be a register that we haven't implemented
+    warn("CP14 unimplemented crn[%d], opc1[%d], crm[%d], opc2[%d]",
+         crn, opc1, crm, opc2);
+    return MISCREG_CP14_UNIMPL;
 }
 
+using namespace std;
+
+bitset<NUM_MISCREG_INFOS> miscRegInfo[NUM_MISCREGS] = {
+    // MISCREG_CPSR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR_FIQ
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR_IRQ
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR_SVC
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR_MON
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR_ABT
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR_HYP
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR_UND
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_ELR_HYP
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_FPSID
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_FPSCR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_MVFR1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_MVFR0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_FPEXC
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+
+    // Helper registers
+    // MISCREG_CPSR_MODE
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CPSR_Q
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_FPSCR_Q
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_FPSCR_EXC
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_LOCKADDR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_LOCKFLAG
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PRRR_MAIR0
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001101")),
+    // MISCREG_PRRR_MAIR0_NS
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")),
+    // MISCREG_PRRR_MAIR0_S
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")),
+    // MISCREG_NMRR_MAIR1
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001101")),
+    // MISCREG_NMRR_MAIR1_NS
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")),
+    // MISCREG_NMRR_MAIR1_S
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000010101")),
+    // MISCREG_PMXEVTYPER_PMCCFILTR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000000101")),
+    // MISCREG_SCTLR_RST
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SEV_MAILBOX
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+
+    // AArch32 CP14 registers
+    // MISCREG_DBGDIDR
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")),
+    // MISCREG_DBGDSCRint
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")),
+    // MISCREG_DBGDCCINT
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGDTRTXint
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGDTRRXint
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWFAR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGVCR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGDTRRXext
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGDSCRext
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100010")),
+    // MISCREG_DBGDTRTXext
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGOSECCR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBVR0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBVR1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBVR2
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBVR3
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBVR4
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBVR5
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBCR0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBCR1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBCR2
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBCR3
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBCR4
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBCR5
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWVR0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWVR1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWVR2
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWVR3
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWCR0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWCR1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWCR2
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGWCR3
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGDRAR
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")),
+    // MISCREG_DBGBXVR4
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGBXVR5
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGOSLAR
+    bitset<NUM_MISCREG_INFOS>(string("1010111111111100000")),
+    // MISCREG_DBGOSLSR
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")),
+    // MISCREG_DBGOSDLR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGPRCR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGDSAR
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")),
+    // MISCREG_DBGCLAIMSET
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGCLAIMCLR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_DBGAUTHSTATUS
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")),
+    // MISCREG_DBGDEVID2
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")),
+    // MISCREG_DBGDEVID1
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")),
+    // MISCREG_DBGDEVID0
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100000")),
+    // MISCREG_TEECR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_JIDR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_TEEHBR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_JOSCR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_JMCR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+
+    // AArch32 CP15 registers
+    // MISCREG_MIDR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_CTR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_TCMTR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_TLBTR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_MPIDR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_REVIDR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000010")),
+    // MISCREG_ID_PFR0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_PFR1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_DFR0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AFR0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_MMFR0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_MMFR1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_MMFR2
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_MMFR3
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR2
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR3
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR4
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR5
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_CCSIDR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_CLIDR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_AIDR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_CSSELR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_CSSELR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_CSSELR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_VPIDR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_VMPIDR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_SCTLR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_SCTLR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_SCTLR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_ACTLR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_ACTLR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_ACTLR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_CPACR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_SCR
+    bitset<NUM_MISCREG_INFOS>(string("1111001100000000001")),
+    // MISCREG_SDER
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_NSACR
+    bitset<NUM_MISCREG_INFOS>(string("1111011101000000001")),
+    // MISCREG_HSCTLR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HACTLR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HCR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HDCR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HCPTR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HSTR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HACR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")),
+    // MISCREG_TTBR0
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_TTBR0_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_TTBR0_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_TTBR1
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_TTBR1_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_TTBR1_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_TTBCR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_TTBCR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_TTBCR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_HTCR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_VTCR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_DACR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_DACR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_DACR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_DFSR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_DFSR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_DFSR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_IFSR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_IFSR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_IFSR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_ADFSR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001010")),
+    // MISCREG_ADFSR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010010")),
+    // MISCREG_ADFSR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010010")),
+    // MISCREG_AIFSR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001010")),
+    // MISCREG_AIFSR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010010")),
+    // MISCREG_AIFSR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010010")),
+    // MISCREG_HADFSR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HAIFSR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HSR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_DFAR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_DFAR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_DFAR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_IFAR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_IFAR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_IFAR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_HDFAR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HIFAR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HPFAR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_ICIALLUIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_BPIALLIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_PAR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_PAR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_PAR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_ICIALLU
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_ICIMVAU
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_CP15ISB
+    bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")),
+    // MISCREG_BPIALL
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_BPIMVA
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_DCIMVAC
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_DCISW
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_ATS1CPR
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_ATS1CPW
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_ATS1CUR
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_ATS1CUW
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_ATS12NSOPR
+    bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")),
+    // MISCREG_ATS12NSOPW
+    bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")),
+    // MISCREG_ATS12NSOUR
+    bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")),
+    // MISCREG_ATS12NSOUW
+    bitset<NUM_MISCREG_INFOS>(string("1010101000000000001")),
+    // MISCREG_DCCMVAC
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_DCCSW
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_CP15DSB
+    bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")),
+    // MISCREG_CP15DMB
+    bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")),
+    // MISCREG_DCCMVAU
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_DCCIMVAC
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_DCCISW
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000010")),
+    // MISCREG_ATS1HR
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_ATS1HW
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBIALLIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIMVAIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIASIDIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIMVAAIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIMVALIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")),
+    // MISCREG_TLBIMVAALIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")),
+    // MISCREG_ITLBIALL
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_ITLBIMVA
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_ITLBIASID
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_DTLBIALL
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_DTLBIMVA
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_DTLBIASID
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIALL
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIMVA
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIASID
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIMVAA
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBIMVAL
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")),
+    // MISCREG_TLBIMVAAL
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")),
+    // MISCREG_TLBIIPAS2IS
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")),
+    // MISCREG_TLBIIPAS2LIS
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")),
+    // MISCREG_TLBIALLHIS
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBIMVAHIS
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBIALLNSNHIS
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBIMVALHIS
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")),
+    // MISCREG_TLBIIPAS2
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")),
+    // MISCREG_TLBIIPAS2L
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")),
+    // MISCREG_TLBIALLH
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBIMVAH
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBIALLNSNH
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBIMVALH
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000000")),
+    // MISCREG_PMCR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCNTENSET
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCNTENCLR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMOVSR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMSWINC
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMSELR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCEID0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCEID1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCCNTR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMXEVTYPER
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCCFILTR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMXEVCNTR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMUSERENR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")),
+    // MISCREG_PMINTENSET
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_PMINTENCLR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_PMOVSSET
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100000")),
+    // MISCREG_L2CTLR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_L2ECTLR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_PRRR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_PRRR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_PRRR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_MAIR0
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_MAIR0_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_MAIR0_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_NMRR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_NMRR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_NMRR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_MAIR1
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_MAIR1_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_MAIR1_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_AMAIR0
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_AMAIR0_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_AMAIR0_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_AMAIR1
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_AMAIR1_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_AMAIR1_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_HMAIR0
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HMAIR1
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_HAMAIR0
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")),
+    // MISCREG_HAMAIR1
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000010")),
+    // MISCREG_VBAR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_VBAR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_VBAR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_MVBAR
+    bitset<NUM_MISCREG_INFOS>(string("1111001100000000001")),
+    // MISCREG_RMR
+    bitset<NUM_MISCREG_INFOS>(string("1111001100000000000")),
+    // MISCREG_ISR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_HVBAR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_FCSEIDR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000010")),
+    // MISCREG_CONTEXTIDR
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_CONTEXTIDR_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_CONTEXTIDR_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_TPIDRURW
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_TPIDRURW_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")),
+    // MISCREG_TPIDRURW_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_TPIDRURO
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_TPIDRURO_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011010110001")),
+    // MISCREG_TPIDRURO_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_TPIDRPRW
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_TPIDRPRW_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011000010001")),
+    // MISCREG_TPIDRPRW_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100000010001")),
+    // MISCREG_HTPIDR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_CNTFRQ
+    bitset<NUM_MISCREG_INFOS>(string("1111010101010100001")),
+    // MISCREG_CNTKCTL
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CNTP_TVAL
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_CNTP_TVAL_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")),
+    // MISCREG_CNTP_TVAL_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")),
+    // MISCREG_CNTP_CTL
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_CNTP_CTL_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011111110001")),
+    // MISCREG_CNTP_CTL_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")),
+    // MISCREG_CNTV_TVAL
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTV_CTL
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTHCTL
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_CNTHP_TVAL
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_CNTHP_CTL
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_IL1DATA0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_IL1DATA1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_IL1DATA2
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_IL1DATA3
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_DL1DATA0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_DL1DATA1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_DL1DATA2
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_DL1DATA3
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_DL1DATA4
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_RAMINDEX
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000000")),
+    // MISCREG_L2ACTLR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_CBAR
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000000")),
+    // MISCREG_HTTBR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_VTTBR
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_CNTPCT
+    bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")),
+    // MISCREG_CNTVCT
+    bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")),
+    // MISCREG_CNTP_CVAL
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000001001")),
+    // MISCREG_CNTP_CVAL_NS
+    bitset<NUM_MISCREG_INFOS>(string("1100110011111110000")),
+    // MISCREG_CNTP_CVAL_S
+    bitset<NUM_MISCREG_INFOS>(string("0011001100111110000")),
+    // MISCREG_CNTV_CVAL
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTVOFF
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_CNTHP_CVAL
+    bitset<NUM_MISCREG_INFOS>(string("1100110000000000001")),
+    // MISCREG_CPUMERRSR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+    // MISCREG_L2MERRSR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000000")),
+
+    // AArch64 registers (Op0=2)
+    // MISCREG_MDCCINT_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_OSDTRRX_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_MDSCR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_OSDTRTX_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_OSECCR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBVR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBVR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBVR2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBVR3_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBVR4_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBVR5_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBCR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBCR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBCR2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBCR3_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBCR4_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGBCR5_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGWVR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGWVR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGWVR2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGWVR3_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGWCR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGWCR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGWCR2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGWCR3_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_MDCCSR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")),
+    // MISCREG_MDDTR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_MDDTRTX_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_MDDTRRX_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGVCR32_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_MDRAR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")),
+    // MISCREG_OSLAR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1010111111111100001")),
+    // MISCREG_OSLSR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")),
+    // MISCREG_OSDLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGPRCR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGCLAIMSET_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGCLAIMCLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DBGAUTHSTATUS_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101111111111100001")),
+    // MISCREG_TEECR32_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000000001")),
+    // MISCREG_TEEHBR32_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000000001")),
+
+    // AArch64 registers (Op0=1,3)
+    // MISCREG_MIDR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_MPIDR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_REVIDR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_PFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_PFR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_DFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_MMFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_MMFR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_MMFR2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_MMFR3_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR3_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR4_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_ISAR5_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_MVFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_MVFR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_MVFR2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64PFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64PFR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64DFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64DFR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64AFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64AFR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64ISAR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64ISAR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64MMFR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ID_AA64MMFR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_CCSIDR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_CLIDR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_AIDR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_CSSELR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CTR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")),
+    // MISCREG_DCZID_EL0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")),
+    // MISCREG_VPIDR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_VMPIDR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SCTLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_ACTLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CPACR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_SCTLR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_ACTLR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_HCR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_MDCR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_CPTR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_HSTR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_HACR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SCTLR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_ACTLR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_SCR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_SDER32_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_CPTR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_MDCR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_TTBR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_TTBR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_TCR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_TTBR0_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_TCR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_VTTBR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_VTCR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_TTBR0_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_TCR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_DACR32_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SPSR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_ELR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_SP_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_SPSEL
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CURRENTEL
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_NZCV
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DAIF
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_FPCR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_FPSR
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DSPSR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_DLR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_SPSR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_ELR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SP_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SPSR_IRQ_AA64
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SPSR_ABT_AA64
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SPSR_UND_AA64
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SPSR_FIQ_AA64
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_SPSR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_ELR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_SP_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_AFSR0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_AFSR1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_ESR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_IFSR32_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_AFSR0_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_AFSR1_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_ESR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_FPEXC32_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_AFSR0_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_AFSR1_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_ESR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_FAR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_FAR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_HPFAR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_FAR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_IC_IALLUIS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")),
+    // MISCREG_PAR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_IC_IALLU
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")),
+    // MISCREG_DC_IVAC_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")),
+    // MISCREG_DC_ISW_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")),
+    // MISCREG_AT_S1E1R_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_AT_S1E1W_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_AT_S1E0R_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_AT_S1E0W_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_DC_CSW_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")),
+    // MISCREG_DC_CISW_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000011")),
+    // MISCREG_DC_ZVA_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010001000011")),
+    // MISCREG_IC_IVAU_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010101000001")),
+    // MISCREG_DC_CVAC_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")),
+    // MISCREG_DC_CVAU_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")),
+    // MISCREG_DC_CIVAC_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010101000011")),
+    // MISCREG_AT_S1E2R_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_AT_S1E2W_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_AT_S12E1R_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_AT_S12E1W_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_AT_S12E0R_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_AT_S12E0W_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_AT_S1E3R_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")),
+    // MISCREG_AT_S1E3W_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")),
+    // MISCREG_TLBI_VMALLE1IS
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VAE1IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_ASIDE1IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VAAE1IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VALE1IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VAALE1IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VMALLE1
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VAE1_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_ASIDE1_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VAAE1_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VALE1_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_VAALE1_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010101010000000001")),
+    // MISCREG_TLBI_IPAS2E1IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_TLBI_IPAS2LE1IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_TLBI_ALLE2IS
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBI_VAE2IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBI_ALLE1IS
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_TLBI_VALE2IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBI_VMALLS12E1IS
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_TLBI_IPAS2E1_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_TLBI_IPAS2LE1_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_TLBI_ALLE2
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBI_VAE2_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBI_ALLE1
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_TLBI_VALE2_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1000100000000000001")),
+    // MISCREG_TLBI_VMALLS12E1
+    bitset<NUM_MISCREG_INFOS>(string("1010100000000000001")),
+    // MISCREG_TLBI_ALLE3IS
+    bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")),
+    // MISCREG_TLBI_VAE3IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")),
+    // MISCREG_TLBI_VALE3IS_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")),
+    // MISCREG_TLBI_ALLE3
+    bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")),
+    // MISCREG_TLBI_VAE3_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")),
+    // MISCREG_TLBI_VALE3_Xt
+    bitset<NUM_MISCREG_INFOS>(string("1010000000000000001")),
+    // MISCREG_PMINTENSET_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_PMINTENCLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_PMCR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCNTENSET_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCNTENCLR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMOVSCLR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMSWINC_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1010101010111100001")),
+    // MISCREG_PMSELR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCEID0_EL0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101111100001")),
+    // MISCREG_PMCEID1_EL0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101111100001")),
+    // MISCREG_PMCCNTR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMXEVTYPER_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMCCFILTR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMXEVCNTR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMUSERENR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")),
+    // MISCREG_PMOVSSET_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_MAIR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_AMAIR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_MAIR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_AMAIR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_MAIR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_AMAIR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_L2CTLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_L2ECTLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_VBAR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_RVBAR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_ISR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_VBAR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_RVBAR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("0101010000000000001")),
+    // MISCREG_VBAR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_RVBAR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("0101000000000000001")),
+    // MISCREG_RMR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_CONTEXTIDR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_TPIDR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_TPIDR_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_TPIDRRO_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111010100001")),
+    // MISCREG_TPIDR_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_TPIDR_EL3
+    bitset<NUM_MISCREG_INFOS>(string("1111000000000000001")),
+    // MISCREG_CNTKCTL_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CNTFRQ_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111010101010100001")),
+    // MISCREG_CNTPCT_EL0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")),
+    // MISCREG_CNTVCT_EL0
+    bitset<NUM_MISCREG_INFOS>(string("0101010101010100001")),
+    // MISCREG_CNTP_TVAL_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTP_CTL_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTP_CVAL_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTV_TVAL_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTV_CTL_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTV_CVAL_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVCNTR0_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVCNTR1_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVCNTR2_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVCNTR3_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVCNTR4_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVCNTR5_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVTYPER0_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVTYPER1_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVTYPER2_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVTYPER3_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVTYPER4_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_PMEVTYPER5_EL0
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTVOFF_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_CNTHCTL_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_CNTHP_TVAL_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_CNTHP_CTL_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_CNTHP_CVAL_EL2
+    bitset<NUM_MISCREG_INFOS>(string("1111110000000000001")),
+    // MISCREG_CNTPS_TVAL_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTPS_CTL_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_CNTPS_CVAL_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_IL1DATA0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_IL1DATA1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_IL1DATA2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_IL1DATA3_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_DL1DATA0_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_DL1DATA1_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_DL1DATA2_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_DL1DATA3_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_DL1DATA4_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_L2ACTLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CPUACTLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CPUECTLR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CPUMERRSR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_L2MERRSR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("1111111111000000001")),
+    // MISCREG_CBAR_EL1
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+
+    // Dummy registers
+    // MISCREG_NOP
+    bitset<NUM_MISCREG_INFOS>(string("1111111111111100001")),
+    // MISCREG_RAZ
+    bitset<NUM_MISCREG_INFOS>(string("0101010101000000001")),
+    // MISCREG_CP14_UNIMPL
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")),
+    // MISCREG_CP15_UNIMPL
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")),
+    // MISCREG_A64_UNIMPL
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000000010")),
+    // MISCREG_UNKNOWN
+    bitset<NUM_MISCREG_INFOS>(string("0000000000000000001"))
+};
+
 MiscRegIndex
 decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
 {
@@ -116,6 +1367,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                     return MISCREG_TLBTR;
                   case 5:
                     return MISCREG_MPIDR;
+                  case 6:
+                    return MISCREG_REVIDR;
                   default:
                     return MISCREG_MIDR;
                 }
@@ -180,6 +1433,14 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                 return MISCREG_CSSELR;
             }
             break;
+          case 4:
+            if (crm == 0) {
+                if (opc2 == 0)
+                    return MISCREG_VPIDR;
+                else if (opc2 == 5)
+                    return MISCREG_VMPIDR;
+            }
+            break;
         }
         break;
       case 1:
@@ -203,6 +1464,26 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                     return MISCREG_NSACR;
                 }
             }
+        } else if (opc1 == 4) {
+            if (crm == 0) {
+                if (opc2 == 0)
+                    return MISCREG_HSCTLR;
+                else if (opc2 == 1)
+                    return MISCREG_HACTLR;
+            } else if (crm == 1) {
+                switch (opc2) {
+                  case 0:
+                    return MISCREG_HCR;
+                  case 1:
+                    return MISCREG_HDCR;
+                  case 2:
+                    return MISCREG_HCPTR;
+                  case 3:
+                    return MISCREG_HSTR;
+                  case 7:
+                    return MISCREG_HACR;
+                }
+            }
         }
         break;
       case 2:
@@ -215,6 +1496,11 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
               case 2:
                 return MISCREG_TTBCR;
             }
+        } else if (opc1 == 4) {
+            if (crm == 0 && opc2 == 2)
+                return MISCREG_HTCR;
+            else if (crm == 1 && opc2 == 2)
+                return MISCREG_VTCR;
         }
         break;
       case 3:
@@ -237,6 +1523,15 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                     return MISCREG_AIFSR;
                 }
             }
+        } else if (opc1 == 4) {
+            if (crm == 1) {
+                if (opc2 == 0)
+                    return MISCREG_HADFSR;
+                else if (opc2 == 1)
+                    return MISCREG_HAIFSR;
+            } else if (crm == 2 && opc2 == 0) {
+                return MISCREG_HSR;
+            }
         }
         break;
       case 6:
@@ -247,6 +1542,15 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
               case 2:
                 return MISCREG_IFAR;
             }
+        } else if (opc1 == 4 && crm == 0) {
+            switch (opc2) {
+              case 0:
+                return MISCREG_HDFAR;
+              case 2:
+                return MISCREG_HIFAR;
+              case 4:
+                return MISCREG_HPFAR;
+            }
         }
         break;
       case 7:
@@ -294,21 +1598,21 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
               case 8:
                 switch (opc2) {
                   case 0:
-                    return MISCREG_V2PCWPR;
+                    return MISCREG_ATS1CPR;
                   case 1:
-                    return MISCREG_V2PCWPW;
+                    return MISCREG_ATS1CPW;
                   case 2:
-                    return MISCREG_V2PCWUR;
+                    return MISCREG_ATS1CUR;
                   case 3:
-                    return MISCREG_V2PCWUW;
+                    return MISCREG_ATS1CUW;
                   case 4:
-                    return MISCREG_V2POWPR;
+                    return MISCREG_ATS12NSOPR;
                   case 5:
-                    return MISCREG_V2POWPW;
+                    return MISCREG_ATS12NSOPW;
                   case 6:
-                    return MISCREG_V2POWUR;
+                    return MISCREG_ATS12NSOUR;
                   case 7:
-                    return MISCREG_V2POWUW;
+                    return MISCREG_ATS12NSOUW;
                 }
                 break;
               case 10:
@@ -316,7 +1620,7 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                   case 1:
                     return MISCREG_DCCMVAC;
                   case 2:
-                    return MISCREG_MCCSW;
+                    return MISCREG_DCCSW;
                   case 4:
                     return MISCREG_CP15DSB;
                   case 5:
@@ -341,6 +1645,11 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                 }
                 break;
             }
+        } else if (opc1 == 4 && crm == 8) {
+            if (opc2 == 0)
+                return MISCREG_ATS1HR;
+            else if (opc2 == 1)
+                return MISCREG_ATS1HW;
         }
         break;
       case 8:
@@ -391,6 +1700,26 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                 }
                 break;
             }
+        } else if (opc1 == 4) {
+            if (crm == 3) {
+                switch (opc2) {
+                  case 0:
+                    return MISCREG_TLBIALLHIS;
+                  case 1:
+                    return MISCREG_TLBIMVAHIS;
+                  case 4:
+                    return MISCREG_TLBIALLNSNHIS;
+                }
+            } else if (crm == 7) {
+                switch (opc2) {
+                  case 0:
+                    return MISCREG_TLBIALLH;
+                  case 1:
+                    return MISCREG_TLBIMVAH;
+                  case 4:
+                    return MISCREG_TLBIALLNSNH;
+                }
+            }
         }
         break;
       case 9:
@@ -421,7 +1750,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                   case 0:
                     return MISCREG_PMCCNTR;
                   case 1:
-                    return MISCREG_PMC_OTHER;
+                    // Selector is PMSELR.SEL
+                    return MISCREG_PMXEVTYPER_PMCCFILTR;
                   case 2:
                     return MISCREG_PMXEVCNTR;
                 }
@@ -434,6 +1764,8 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                     return MISCREG_PMINTENSET;
                   case 2:
                     return MISCREG_PMINTENCLR;
+                  case 3:
+                    return MISCREG_PMOVSSET;
                 }
                 break;
             }
@@ -443,27 +1775,44 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                 switch (opc2) {
                   case 2: // L2CTLR, L2 Control Register
                     return MISCREG_L2CTLR;
-                  default:
-                    warn("Uknown miscregs: crn:%d crm:%d opc1:%d opc2:%d\n",
-                         crn,crm, opc1,opc2);
-                    break;
+                  case 3:
+                    return MISCREG_L2ECTLR;
                 }
                 break;
-              default:
-                return MISCREG_L2LATENCY;
+                break;
             }
         }
-        //Reserved for Branch Predictor, Cache and TCM operations
         break;
       case 10:
         if (opc1 == 0) {
             // crm 0, 1, 4, and 8, with op2 0 - 7, reserved for TLB lockdown
             if (crm == 2) { // TEX Remap Registers
                 if (opc2 == 0) {
-                    return MISCREG_PRRR;
+                    // Selector is TTBCR.EAE
+                    return MISCREG_PRRR_MAIR0;
                 } else if (opc2 == 1) {
-                    return MISCREG_NMRR;
+                    // Selector is TTBCR.EAE
+                    return MISCREG_NMRR_MAIR1;
                 }
+            } else if (crm == 3) {
+                if (opc2 == 0) {
+                    return MISCREG_AMAIR0;
+                } else if (opc2 == 1) {
+                    return MISCREG_AMAIR1;
+                }
+            }
+        } else if (opc1 == 4) {
+            // crm 0, 1, 4, and 8, with op2 0 - 7, reserved for TLB lockdown
+            if (crm == 2) {
+                if (opc2 == 0)
+                    return MISCREG_HMAIR0;
+                else if (opc2 == 1)
+                    return MISCREG_HMAIR1;
+            } else if (crm == 3) {
+                if (opc2 == 0)
+                    return MISCREG_HAMAIR0;
+                else if (opc2 == 1)
+                    return MISCREG_HAMAIR1;
             }
         }
         break;
@@ -498,6 +1847,9 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                     return MISCREG_ISR;
                 }
             }
+        } else if (opc1 == 4) {
+            if (crm == 0 && opc2 == 0)
+                return MISCREG_HVBAR;
         }
         break;
       case 13:
@@ -505,7 +1857,7 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
             if (crm == 0) {
                 switch (opc2) {
                   case 0:
-                    return MISCREG_FCEIDR;
+                    return MISCREG_FCSEIDR;
                   case 1:
                     return MISCREG_CONTEXTIDR;
                   case 2:
@@ -516,14 +1868,1682 @@ decodeCP15Reg(unsigned crn, unsigned opc1, unsigned crm, unsigned opc2)
                     return MISCREG_TPIDRPRW;
                 }
             }
+        } else if (opc1 == 4) {
+            if (crm == 0 && opc2 == 2)
+                return MISCREG_HTPIDR;
+        }
+        break;
+      case 14:
+        if (opc1 == 0) {
+            switch (crm) {
+              case 0:
+                if (opc2 == 0)
+                    return MISCREG_CNTFRQ;
+                break;
+              case 1:
+                if (opc2 == 0)
+                    return MISCREG_CNTKCTL;
+                break;
+              case 2:
+                if (opc2 == 0)
+                    return MISCREG_CNTP_TVAL;
+                else if (opc2 == 1)
+                    return MISCREG_CNTP_CTL;
+                break;
+              case 3:
+                if (opc2 == 0)
+                    return MISCREG_CNTV_TVAL;
+                else if (opc2 == 1)
+                    return MISCREG_CNTV_CTL;
+                break;
+            }
+        } else if (opc1 == 4) {
+            if (crm == 1 && opc2 == 0) {
+                return MISCREG_CNTHCTL;
+            } else if (crm == 2) {
+                if (opc2 == 0)
+                    return MISCREG_CNTHP_TVAL;
+                else if (opc2 == 1)
+                    return MISCREG_CNTHP_CTL;
+            }
         }
         break;
       case 15:
         // Implementation defined
-        return MISCREG_CRN15;
+        return MISCREG_CP15_UNIMPL;
     }
     // Unrecognized register
-    return NUM_MISCREGS;
+    return MISCREG_CP15_UNIMPL;
 }
 
+MiscRegIndex
+decodeCP15Reg64(unsigned crm, unsigned opc1)
+{
+    switch (crm) {
+      case 2:
+        switch (opc1) {
+          case 0:
+            return MISCREG_TTBR0;
+          case 1:
+            return MISCREG_TTBR1;
+          case 4:
+            return MISCREG_HTTBR;
+          case 6:
+            return MISCREG_VTTBR;
+        }
+        break;
+      case 7:
+        if (opc1 == 0)
+            return MISCREG_PAR;
+        break;
+      case 14:
+        switch (opc1) {
+          case 0:
+            return MISCREG_CNTPCT;
+          case 1:
+            return MISCREG_CNTVCT;
+          case 2:
+            return MISCREG_CNTP_CVAL;
+          case 3:
+            return MISCREG_CNTV_CVAL;
+          case 4:
+            return MISCREG_CNTVOFF;
+          case 6:
+            return MISCREG_CNTHP_CVAL;
+        }
+        break;
+      case 15:
+        if (opc1 == 0)
+            return MISCREG_CPUMERRSR;
+        else if (opc1 == 1)
+            return MISCREG_L2MERRSR;
+        break;
+    }
+    // Unrecognized register
+    return MISCREG_CP15_UNIMPL;
 }
+
+bool
+canReadCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc)
+{
+    bool secure = !scr.ns;
+    bool canRead;
+
+    switch (cpsr.mode) {
+      case MODE_USER:
+        canRead = secure ? miscRegInfo[reg][MISCREG_USR_S_RD] :
+                           miscRegInfo[reg][MISCREG_USR_NS_RD];
+        break;
+      case MODE_FIQ:
+      case MODE_IRQ:
+      case MODE_SVC:
+      case MODE_ABORT:
+      case MODE_UNDEFINED:
+      case MODE_SYSTEM:
+        canRead = secure ? miscRegInfo[reg][MISCREG_PRI_S_RD] :
+                           miscRegInfo[reg][MISCREG_PRI_NS_RD];
+        break;
+      case MODE_MON:
+        canRead = secure ? miscRegInfo[reg][MISCREG_MON_NS0_RD] :
+                           miscRegInfo[reg][MISCREG_MON_NS1_RD];
+        break;
+      case MODE_HYP:
+        canRead = miscRegInfo[reg][MISCREG_HYP_RD];
+        break;
+      default:
+        panic("Unrecognized mode setting in CPSR.\n");
+    }
+    // can't do permissions checkes on the root of a banked pair of regs
+    assert(!miscRegInfo[reg][MISCREG_BANKED]);
+    return canRead;
+}
+
+bool
+canWriteCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc)
+{
+    bool secure = !scr.ns;
+    bool canWrite;
+
+    switch (cpsr.mode) {
+      case MODE_USER:
+        canWrite = secure ? miscRegInfo[reg][MISCREG_USR_S_WR] :
+                            miscRegInfo[reg][MISCREG_USR_NS_WR];
+        break;
+      case MODE_FIQ:
+      case MODE_IRQ:
+      case MODE_SVC:
+      case MODE_ABORT:
+      case MODE_UNDEFINED:
+      case MODE_SYSTEM:
+        canWrite = secure ? miscRegInfo[reg][MISCREG_PRI_S_WR] :
+                            miscRegInfo[reg][MISCREG_PRI_NS_WR];
+        break;
+      case MODE_MON:
+        canWrite = secure ? miscRegInfo[reg][MISCREG_MON_NS0_WR] :
+                            miscRegInfo[reg][MISCREG_MON_NS1_WR];
+        break;
+      case MODE_HYP:
+        canWrite =  miscRegInfo[reg][MISCREG_HYP_WR];
+        break;
+      default:
+        panic("Unrecognized mode setting in CPSR.\n");
+    }
+    // can't do permissions checkes on the root of a banked pair of regs
+    assert(!miscRegInfo[reg][MISCREG_BANKED]);
+    return canWrite;
+}
+
+int
+flattenMiscRegNsBanked(int reg, ThreadContext *tc)
+{
+    if (miscRegInfo[reg][MISCREG_BANKED]) {
+        SCR scr = tc->readMiscReg(MISCREG_SCR);
+        reg += (ArmSystem::haveSecurity(tc) && !scr.ns) ? 2 : 1;
+    }
+    return reg;
+}
+
+int
+flattenMiscRegNsBanked(int reg, ThreadContext *tc, bool ns)
+{
+    if (miscRegInfo[reg][MISCREG_BANKED]) {
+        reg += (ArmSystem::haveSecurity(tc) && !ns) ? 2 : 1;
+    }
+    return reg;
+}
+
+
+/**
+ * If the reg is a child reg of a banked set, then the parent is the last
+ * banked one in the list. This is messy, and the wish is to eventually have
+ * the bitmap replaced with a better data structure. the preUnflatten function
+ * initializes a lookup table to speed up the search for these banked
+ * registers.
+ */
+
+int unflattenResultMiscReg[NUM_MISCREGS];
+
+void
+preUnflattenMiscReg()
+{
+    int reg = -1;
+    for (int i = 0 ; i < NUM_MISCREGS; i++){
+        if (miscRegInfo[i][MISCREG_BANKED])
+            reg = i;
+        if (miscRegInfo[i][MISCREG_BANKED_CHILD])
+            unflattenResultMiscReg[i] = reg;
+        else
+            unflattenResultMiscReg[i] = i;
+        // if this assert fails, no parent was found, and something is broken
+        assert(unflattenResultMiscReg[i] > -1);
+    }
+}
+
+int
+unflattenMiscReg(int reg)
+{
+    return unflattenResultMiscReg[reg];
+}
+
+bool
+canReadAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc)
+{
+    // Check for SP_EL0 access while SPSEL == 0
+    if ((reg == MISCREG_SP_EL0) && (tc->readMiscReg(MISCREG_SPSEL) == 0))
+        return false;
+
+    // Check for RVBAR access
+    if (reg == MISCREG_RVBAR_EL1) {
+        ExceptionLevel highest_el = ArmSystem::highestEL(tc);
+        if (highest_el == EL2 || highest_el == EL3)
+            return false;
+    }
+    if (reg == MISCREG_RVBAR_EL2) {
+        ExceptionLevel highest_el = ArmSystem::highestEL(tc);
+        if (highest_el == EL3)
+            return false;
+    }
+
+    bool secure = ArmSystem::haveSecurity(tc) && !scr.ns;
+
+    switch (opModeToEL((OperatingMode) (uint8_t) cpsr.mode)) {
+      case EL0:
+        return secure ? miscRegInfo[reg][MISCREG_USR_S_RD] :
+            miscRegInfo[reg][MISCREG_USR_NS_RD];
+      case EL1:
+        return secure ? miscRegInfo[reg][MISCREG_PRI_S_RD] :
+            miscRegInfo[reg][MISCREG_PRI_NS_RD];
+      // @todo: uncomment this to enable Virtualization
+      // case EL2:
+      //   return miscRegInfo[reg][MISCREG_HYP_RD];
+      case EL3:
+        return secure ? miscRegInfo[reg][MISCREG_MON_NS0_RD] :
+            miscRegInfo[reg][MISCREG_MON_NS1_RD];
+      default:
+        panic("Invalid exception level");
+    }
+}
+
+bool
+canWriteAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr, ThreadContext *tc)
+{
+    // Check for SP_EL0 access while SPSEL == 0
+    if ((reg == MISCREG_SP_EL0) && (tc->readMiscReg(MISCREG_SPSEL) == 0))
+        return false;
+    ExceptionLevel el = opModeToEL((OperatingMode) (uint8_t) cpsr.mode);
+    if (reg == MISCREG_DAIF) {
+        SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1);
+        if (el == EL0 && !sctlr.uma)
+            return false;
+    }
+    if (reg == MISCREG_DC_ZVA_Xt) {
+        SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1);
+        if (el == EL0 && !sctlr.dze)
+            return false;
+    }
+    if (reg == MISCREG_DC_CVAC_Xt || reg == MISCREG_DC_CIVAC_Xt) {
+        SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1);
+        if (el == EL0 && !sctlr.uci)
+            return false;
+    }
+
+    bool secure = ArmSystem::haveSecurity(tc) && !scr.ns;
+
+    switch (el) {
+      case EL0:
+        return secure ? miscRegInfo[reg][MISCREG_USR_S_WR] :
+            miscRegInfo[reg][MISCREG_USR_NS_WR];
+      case EL1:
+        return secure ? miscRegInfo[reg][MISCREG_PRI_S_WR] :
+            miscRegInfo[reg][MISCREG_PRI_NS_WR];
+      // @todo: uncomment this to enable Virtualization
+      // case EL2:
+      //   return miscRegInfo[reg][MISCREG_HYP_WR];
+      case EL3:
+        return secure ? miscRegInfo[reg][MISCREG_MON_NS0_WR] :
+            miscRegInfo[reg][MISCREG_MON_NS1_WR];
+      default:
+        panic("Invalid exception level");
+    }
+}
+
+MiscRegIndex
+decodeAArch64SysReg(unsigned op0, unsigned op1,
+                    unsigned crn, unsigned crm,
+                    unsigned op2)
+{
+    switch (op0) {
+      case 1:
+        switch (crn) {
+          case 7:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_IC_IALLUIS;
+                    }
+                    break;
+                  case 5:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_IC_IALLU;
+                    }
+                    break;
+                  case 6:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_DC_IVAC_Xt;
+                      case 2:
+                        return MISCREG_DC_ISW_Xt;
+                    }
+                    break;
+                  case 8:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AT_S1E1R_Xt;
+                      case 1:
+                        return MISCREG_AT_S1E1W_Xt;
+                      case 2:
+                        return MISCREG_AT_S1E0R_Xt;
+                      case 3:
+                        return MISCREG_AT_S1E0W_Xt;
+                    }
+                    break;
+                  case 10:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_DC_CSW_Xt;
+                    }
+                    break;
+                  case 14:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_DC_CISW_Xt;
+                    }
+                    break;
+                }
+                break;
+              case 3:
+                switch (crm) {
+                  case 4:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_DC_ZVA_Xt;
+                    }
+                    break;
+                  case 5:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_IC_IVAU_Xt;
+                    }
+                    break;
+                  case 10:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_DC_CVAC_Xt;
+                    }
+                    break;
+                  case 11:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_DC_CVAU_Xt;
+                    }
+                    break;
+                  case 14:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_DC_CIVAC_Xt;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 8:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AT_S1E2R_Xt;
+                      case 1:
+                        return MISCREG_AT_S1E2W_Xt;
+                      case 4:
+                        return MISCREG_AT_S12E1R_Xt;
+                      case 5:
+                        return MISCREG_AT_S12E1W_Xt;
+                      case 6:
+                        return MISCREG_AT_S12E0R_Xt;
+                      case 7:
+                        return MISCREG_AT_S12E0W_Xt;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 8:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AT_S1E3R_Xt;
+                      case 1:
+                        return MISCREG_AT_S1E3W_Xt;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 8:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TLBI_VMALLE1IS;
+                      case 1:
+                        return MISCREG_TLBI_VAE1IS_Xt;
+                      case 2:
+                        return MISCREG_TLBI_ASIDE1IS_Xt;
+                      case 3:
+                        return MISCREG_TLBI_VAAE1IS_Xt;
+                      case 5:
+                        return MISCREG_TLBI_VALE1IS_Xt;
+                      case 7:
+                        return MISCREG_TLBI_VAALE1IS_Xt;
+                    }
+                    break;
+                  case 7:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TLBI_VMALLE1;
+                      case 1:
+                        return MISCREG_TLBI_VAE1_Xt;
+                      case 2:
+                        return MISCREG_TLBI_ASIDE1_Xt;
+                      case 3:
+                        return MISCREG_TLBI_VAAE1_Xt;
+                      case 5:
+                        return MISCREG_TLBI_VALE1_Xt;
+                      case 7:
+                        return MISCREG_TLBI_VAALE1_Xt;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_TLBI_IPAS2E1IS_Xt;
+                      case 5:
+                        return MISCREG_TLBI_IPAS2LE1IS_Xt;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TLBI_ALLE2IS;
+                      case 1:
+                        return MISCREG_TLBI_VAE2IS_Xt;
+                      case 4:
+                        return MISCREG_TLBI_ALLE1IS;
+                      case 5:
+                        return MISCREG_TLBI_VALE2IS_Xt;
+                      case 6:
+                        return MISCREG_TLBI_VMALLS12E1IS;
+                    }
+                    break;
+                  case 4:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_TLBI_IPAS2E1_Xt;
+                      case 5:
+                        return MISCREG_TLBI_IPAS2LE1_Xt;
+                    }
+                    break;
+                  case 7:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TLBI_ALLE2;
+                      case 1:
+                        return MISCREG_TLBI_VAE2_Xt;
+                      case 4:
+                        return MISCREG_TLBI_ALLE1;
+                      case 5:
+                        return MISCREG_TLBI_VALE2_Xt;
+                      case 6:
+                        return MISCREG_TLBI_VMALLS12E1;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TLBI_ALLE3IS;
+                      case 1:
+                        return MISCREG_TLBI_VAE3IS_Xt;
+                      case 5:
+                        return MISCREG_TLBI_VALE3IS_Xt;
+                    }
+                    break;
+                  case 7:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TLBI_ALLE3;
+                      case 1:
+                        return MISCREG_TLBI_VAE3_Xt;
+                      case 5:
+                        return MISCREG_TLBI_VALE3_Xt;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+        }
+        break;
+      case 2:
+        switch (crn) {
+          case 0:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_OSDTRRX_EL1;
+                      case 4:
+                        return MISCREG_DBGBVR0_EL1;
+                      case 5:
+                        return MISCREG_DBGBCR0_EL1;
+                      case 6:
+                        return MISCREG_DBGWVR0_EL1;
+                      case 7:
+                        return MISCREG_DBGWCR0_EL1;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 4:
+                        return MISCREG_DBGBVR1_EL1;
+                      case 5:
+                        return MISCREG_DBGBCR1_EL1;
+                      case 6:
+                        return MISCREG_DBGWVR1_EL1;
+                      case 7:
+                        return MISCREG_DBGWCR1_EL1;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MDCCINT_EL1;
+                      case 2:
+                        return MISCREG_MDSCR_EL1;
+                      case 4:
+                        return MISCREG_DBGBVR2_EL1;
+                      case 5:
+                        return MISCREG_DBGBCR2_EL1;
+                      case 6:
+                        return MISCREG_DBGWVR2_EL1;
+                      case 7:
+                        return MISCREG_DBGWCR2_EL1;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_OSDTRTX_EL1;
+                      case 4:
+                        return MISCREG_DBGBVR3_EL1;
+                      case 5:
+                        return MISCREG_DBGBCR3_EL1;
+                      case 6:
+                        return MISCREG_DBGWVR3_EL1;
+                      case 7:
+                        return MISCREG_DBGWCR3_EL1;
+                    }
+                    break;
+                  case 4:
+                    switch (op2) {
+                      case 4:
+                        return MISCREG_DBGBVR4_EL1;
+                      case 5:
+                        return MISCREG_DBGBCR4_EL1;
+                    }
+                    break;
+                  case 5:
+                    switch (op2) {
+                      case 4:
+                        return MISCREG_DBGBVR5_EL1;
+                      case 5:
+                        return MISCREG_DBGBCR5_EL1;
+                    }
+                    break;
+                  case 6:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_OSECCR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 2:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TEECR32_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 3:
+                switch (crm) {
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MDCCSR_EL0;
+                    }
+                    break;
+                  case 4:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MDDTR_EL0;
+                    }
+                    break;
+                  case 5:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MDDTRRX_EL0;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 7:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_DBGVCR32_EL2;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 1:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MDRAR_EL1;
+                      case 4:
+                        return MISCREG_OSLAR_EL1;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 4:
+                        return MISCREG_OSLSR_EL1;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 4:
+                        return MISCREG_OSDLR_EL1;
+                    }
+                    break;
+                  case 4:
+                    switch (op2) {
+                      case 4:
+                        return MISCREG_DBGPRCR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 2:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TEEHBR32_EL1;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 7:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 8:
+                    switch (op2) {
+                      case 6:
+                        return MISCREG_DBGCLAIMSET_EL1;
+                    }
+                    break;
+                  case 9:
+                    switch (op2) {
+                      case 6:
+                        return MISCREG_DBGCLAIMCLR_EL1;
+                    }
+                    break;
+                  case 14:
+                    switch (op2) {
+                      case 6:
+                        return MISCREG_DBGAUTHSTATUS_EL1;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+        }
+        break;
+      case 3:
+        switch (crn) {
+          case 0:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MIDR_EL1;
+                      case 5:
+                        return MISCREG_MPIDR_EL1;
+                      case 6:
+                        return MISCREG_REVIDR_EL1;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ID_PFR0_EL1;
+                      case 1:
+                        return MISCREG_ID_PFR1_EL1;
+                      case 2:
+                        return MISCREG_ID_DFR0_EL1;
+                      case 3:
+                        return MISCREG_ID_AFR0_EL1;
+                      case 4:
+                        return MISCREG_ID_MMFR0_EL1;
+                      case 5:
+                        return MISCREG_ID_MMFR1_EL1;
+                      case 6:
+                        return MISCREG_ID_MMFR2_EL1;
+                      case 7:
+                        return MISCREG_ID_MMFR3_EL1;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ID_ISAR0_EL1;
+                      case 1:
+                        return MISCREG_ID_ISAR1_EL1;
+                      case 2:
+                        return MISCREG_ID_ISAR2_EL1;
+                      case 3:
+                        return MISCREG_ID_ISAR3_EL1;
+                      case 4:
+                        return MISCREG_ID_ISAR4_EL1;
+                      case 5:
+                        return MISCREG_ID_ISAR5_EL1;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MVFR0_EL1;
+                      case 1:
+                        return MISCREG_MVFR1_EL1;
+                      case 2:
+                        return MISCREG_MVFR2_EL1;
+                      case 3 ... 7:
+                        return MISCREG_RAZ;
+                    }
+                    break;
+                  case 4:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ID_AA64PFR0_EL1;
+                      case 1:
+                        return MISCREG_ID_AA64PFR1_EL1;
+                      case 2 ... 7:
+                        return MISCREG_RAZ;
+                    }
+                    break;
+                  case 5:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ID_AA64DFR0_EL1;
+                      case 1:
+                        return MISCREG_ID_AA64DFR1_EL1;
+                      case 4:
+                        return MISCREG_ID_AA64AFR0_EL1;
+                      case 5:
+                        return MISCREG_ID_AA64AFR1_EL1;
+                      case 2:
+                      case 3:
+                      case 6:
+                      case 7:
+                        return MISCREG_RAZ;
+                    }
+                    break;
+                  case 6:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ID_AA64ISAR0_EL1;
+                      case 1:
+                        return MISCREG_ID_AA64ISAR1_EL1;
+                      case 2 ... 7:
+                        return MISCREG_RAZ;
+                    }
+                    break;
+                  case 7:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ID_AA64MMFR0_EL1;
+                      case 1:
+                        return MISCREG_ID_AA64MMFR1_EL1;
+                      case 2 ... 7:
+                        return MISCREG_RAZ;
+                    }
+                    break;
+                }
+                break;
+              case 1:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CCSIDR_EL1;
+                      case 1:
+                        return MISCREG_CLIDR_EL1;
+                      case 7:
+                        return MISCREG_AIDR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 2:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CSSELR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 3:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_CTR_EL0;
+                      case 7:
+                        return MISCREG_DCZID_EL0;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_VPIDR_EL2;
+                      case 5:
+                        return MISCREG_VMPIDR_EL2;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 1:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SCTLR_EL1;
+                      case 1:
+                        return MISCREG_ACTLR_EL1;
+                      case 2:
+                        return MISCREG_CPACR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SCTLR_EL2;
+                      case 1:
+                        return MISCREG_ACTLR_EL2;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_HCR_EL2;
+                      case 1:
+                        return MISCREG_MDCR_EL2;
+                      case 2:
+                        return MISCREG_CPTR_EL2;
+                      case 3:
+                        return MISCREG_HSTR_EL2;
+                      case 7:
+                        return MISCREG_HACR_EL2;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SCTLR_EL3;
+                      case 1:
+                        return MISCREG_ACTLR_EL3;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SCR_EL3;
+                      case 1:
+                        return MISCREG_SDER32_EL3;
+                      case 2:
+                        return MISCREG_CPTR_EL3;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_MDCR_EL3;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 2:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TTBR0_EL1;
+                      case 1:
+                        return MISCREG_TTBR1_EL1;
+                      case 2:
+                        return MISCREG_TCR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TTBR0_EL2;
+                      case 2:
+                        return MISCREG_TCR_EL2;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_VTTBR_EL2;
+                      case 2:
+                        return MISCREG_VTCR_EL2;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_TTBR0_EL3;
+                      case 2:
+                        return MISCREG_TCR_EL3;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 3:
+            switch (op1) {
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_DACR32_EL2;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 4:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SPSR_EL1;
+                      case 1:
+                        return MISCREG_ELR_EL1;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SP_EL0;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SPSEL;
+                      case 2:
+                        return MISCREG_CURRENTEL;
+                    }
+                    break;
+                }
+                break;
+              case 3:
+                switch (crm) {
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_NZCV;
+                      case 1:
+                        return MISCREG_DAIF;
+                    }
+                    break;
+                  case 4:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_FPCR;
+                      case 1:
+                        return MISCREG_FPSR;
+                    }
+                    break;
+                  case 5:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_DSPSR_EL0;
+                      case 1:
+                        return MISCREG_DLR_EL0;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SPSR_EL2;
+                      case 1:
+                        return MISCREG_ELR_EL2;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SP_EL1;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SPSR_IRQ_AA64;
+                      case 1:
+                        return MISCREG_SPSR_ABT_AA64;
+                      case 2:
+                        return MISCREG_SPSR_UND_AA64;
+                      case 3:
+                        return MISCREG_SPSR_FIQ_AA64;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SPSR_EL3;
+                      case 1:
+                        return MISCREG_ELR_EL3;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_SP_EL2;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 5:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AFSR0_EL1;
+                      case 1:
+                        return MISCREG_AFSR1_EL1;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ESR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_IFSR32_EL2;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AFSR0_EL2;
+                      case 1:
+                        return MISCREG_AFSR1_EL2;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ESR_EL2;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_FPEXC32_EL2;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AFSR0_EL3;
+                      case 1:
+                        return MISCREG_AFSR1_EL3;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ESR_EL3;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 6:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_FAR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_FAR_EL2;
+                      case 4:
+                        return MISCREG_HPFAR_EL2;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_FAR_EL3;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 7:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 4:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_PAR_EL1;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 9:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 14:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_PMINTENSET_EL1;
+                      case 2:
+                        return MISCREG_PMINTENCLR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 3:
+                switch (crm) {
+                  case 12:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_PMCR_EL0;
+                      case 1:
+                        return MISCREG_PMCNTENSET_EL0;
+                      case 2:
+                        return MISCREG_PMCNTENCLR_EL0;
+                      case 3:
+                        return MISCREG_PMOVSCLR_EL0;
+                      case 4:
+                        return MISCREG_PMSWINC_EL0;
+                      case 5:
+                        return MISCREG_PMSELR_EL0;
+                      case 6:
+                        return MISCREG_PMCEID0_EL0;
+                      case 7:
+                        return MISCREG_PMCEID1_EL0;
+                    }
+                    break;
+                  case 13:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_PMCCNTR_EL0;
+                      case 1:
+                        return MISCREG_PMCCFILTR_EL0;
+                      case 2:
+                        return MISCREG_PMXEVCNTR_EL0;
+                    }
+                    break;
+                  case 14:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_PMUSERENR_EL0;
+                      case 3:
+                        return MISCREG_PMOVSSET_EL0;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 10:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MAIR_EL1;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AMAIR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MAIR_EL2;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AMAIR_EL2;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_MAIR_EL3;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_AMAIR_EL3;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 11:
+            switch (op1) {
+              case 1:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_L2CTLR_EL1;
+                      case 3:
+                        return MISCREG_L2ECTLR_EL1;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 12:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_VBAR_EL1;
+                      case 1:
+                        return MISCREG_RVBAR_EL1;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_ISR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_VBAR_EL2;
+                      case 1:
+                        return MISCREG_RVBAR_EL2;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_VBAR_EL3;
+                      case 1:
+                        return MISCREG_RVBAR_EL3;
+                      case 2:
+                        return MISCREG_RMR_EL3;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 13:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 1:
+                        return MISCREG_CONTEXTIDR_EL1;
+                      case 4:
+                        return MISCREG_TPIDR_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 3:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_TPIDR_EL0;
+                      case 3:
+                        return MISCREG_TPIDRRO_EL0;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_TPIDR_EL2;
+                    }
+                    break;
+                }
+                break;
+              case 6:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 2:
+                        return MISCREG_TPIDR_EL3;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 14:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CNTKCTL_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 3:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CNTFRQ_EL0;
+                      case 1:
+                        return MISCREG_CNTPCT_EL0;
+                      case 2:
+                        return MISCREG_CNTVCT_EL0;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CNTP_TVAL_EL0;
+                      case 1:
+                        return MISCREG_CNTP_CTL_EL0;
+                      case 2:
+                        return MISCREG_CNTP_CVAL_EL0;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CNTV_TVAL_EL0;
+                      case 1:
+                        return MISCREG_CNTV_CTL_EL0;
+                      case 2:
+                        return MISCREG_CNTV_CVAL_EL0;
+                    }
+                    break;
+                  case 8:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_PMEVCNTR0_EL0;
+                      case 1:
+                        return MISCREG_PMEVCNTR1_EL0;
+                      case 2:
+                        return MISCREG_PMEVCNTR2_EL0;
+                      case 3:
+                        return MISCREG_PMEVCNTR3_EL0;
+                      case 4:
+                        return MISCREG_PMEVCNTR4_EL0;
+                      case 5:
+                        return MISCREG_PMEVCNTR5_EL0;
+                    }
+                    break;
+                  case 12:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_PMEVTYPER0_EL0;
+                      case 1:
+                        return MISCREG_PMEVTYPER1_EL0;
+                      case 2:
+                        return MISCREG_PMEVTYPER2_EL0;
+                      case 3:
+                        return MISCREG_PMEVTYPER3_EL0;
+                      case 4:
+                        return MISCREG_PMEVTYPER4_EL0;
+                      case 5:
+                        return MISCREG_PMEVTYPER5_EL0;
+                    }
+                    break;
+                }
+                break;
+              case 4:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 3:
+                        return MISCREG_CNTVOFF_EL2;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CNTHCTL_EL2;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CNTHP_TVAL_EL2;
+                      case 1:
+                        return MISCREG_CNTHP_CTL_EL2;
+                      case 2:
+                        return MISCREG_CNTHP_CVAL_EL2;
+                    }
+                    break;
+                }
+                break;
+              case 7:
+                switch (crm) {
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CNTPS_TVAL_EL1;
+                      case 1:
+                        return MISCREG_CNTPS_CTL_EL1;
+                      case 2:
+                        return MISCREG_CNTPS_CVAL_EL1;
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+          case 15:
+            switch (op1) {
+              case 0:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_IL1DATA0_EL1;
+                      case 1:
+                        return MISCREG_IL1DATA1_EL1;
+                      case 2:
+                        return MISCREG_IL1DATA2_EL1;
+                      case 3:
+                        return MISCREG_IL1DATA3_EL1;
+                    }
+                    break;
+                  case 1:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_DL1DATA0_EL1;
+                      case 1:
+                        return MISCREG_DL1DATA1_EL1;
+                      case 2:
+                        return MISCREG_DL1DATA2_EL1;
+                      case 3:
+                        return MISCREG_DL1DATA3_EL1;
+                      case 4:
+                        return MISCREG_DL1DATA4_EL1;
+                    }
+                    break;
+                }
+                break;
+              case 1:
+                switch (crm) {
+                  case 0:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_L2ACTLR_EL1;
+                    }
+                    break;
+                  case 2:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CPUACTLR_EL1;
+                      case 1:
+                        return MISCREG_CPUECTLR_EL1;
+                      case 2:
+                        return MISCREG_CPUMERRSR_EL1;
+                      case 3:
+                        return MISCREG_L2MERRSR_EL1;
+                    }
+                    break;
+                  case 3:
+                    switch (op2) {
+                      case 0:
+                        return MISCREG_CBAR_EL1;
+
+                    }
+                    break;
+                }
+                break;
+            }
+            break;
+        }
+        break;
+    }
+
+    return MISCREG_UNKNOWN;
+}
+
+} // namespace ArmISA
diff --git a/src/arch/arm/miscregs.hh b/src/arch/arm/miscregs.hh
index 13234ddf5..c447dcd27 100644
--- a/src/arch/arm/miscregs.hh
+++ b/src/arch/arm/miscregs.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -38,13 +38,19 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Gabe Black
+ *          Giacomo Gabrielli
  */
 #ifndef __ARCH_ARM_MISCREGS_HH__
 #define __ARCH_ARM_MISCREGS_HH__
 
+#include <bitset>
+
 #include "base/bitunion.hh"
 #include "base/compiler.hh"
 
+class ThreadContext;
+
+
 namespace ArmISA
 {
     enum ConditionCode {
@@ -67,284 +73,1330 @@ namespace ArmISA
     };
 
     enum MiscRegIndex {
-        MISCREG_CPSR = 0,
-        MISCREG_CPSR_Q,
-        MISCREG_SPSR,
-        MISCREG_SPSR_FIQ,
-        MISCREG_SPSR_IRQ,
-        MISCREG_SPSR_SVC,
-        MISCREG_SPSR_MON,
-        MISCREG_SPSR_UND,
-        MISCREG_SPSR_ABT,
-        MISCREG_FPSR,
-        MISCREG_FPSID,
-        MISCREG_FPSCR,
-        MISCREG_FPSCR_QC,  // Cumulative saturation flag
-        MISCREG_FPSCR_EXC,  // Cumulative FP exception flags
-        MISCREG_FPEXC,
-        MISCREG_MVFR0,
-        MISCREG_MVFR1,
-        MISCREG_SCTLR_RST,
-        MISCREG_SEV_MAILBOX,
+        MISCREG_CPSR = 0,               //   0
+        MISCREG_SPSR,                   //   1
+        MISCREG_SPSR_FIQ,               //   2
+        MISCREG_SPSR_IRQ,               //   3
+        MISCREG_SPSR_SVC,               //   4
+        MISCREG_SPSR_MON,               //   5
+        MISCREG_SPSR_ABT,               //   6
+        MISCREG_SPSR_HYP,               //   7
+        MISCREG_SPSR_UND,               //   8
+        MISCREG_ELR_HYP,                //   9
+        MISCREG_FPSID,                  //  10
+        MISCREG_FPSCR,                  //  11
+        MISCREG_MVFR1,                  //  12
+        MISCREG_MVFR0,                  //  13
+        MISCREG_FPEXC,                  //  14
 
-        // CP14 registers
-        MISCREG_CP14_START,
-        MISCREG_DBGDIDR = MISCREG_CP14_START,
-        MISCREG_DBGDSCR_INT,
-        MISCREG_DBGDTRRX_INT,
-        MISCREG_DBGTRTX_INT,
-        MISCREG_DBGWFAR,
-        MISCREG_DBGVCR,
-        MISCREG_DBGECR,
-        MISCREG_DBGDSCCR,
-        MISCREG_DBGSMCR,
-        MISCREG_DBGDTRRX_EXT,
-        MISCREG_DBGDSCR_EXT,
-        MISCREG_DBGDTRTX_EXT,
-        MISCREG_DBGDRCR,
-        MISCREG_DBGBVR,
-        MISCREG_DBGBCR,
-        MISCREG_DBGBVR_M,
-        MISCREG_DBGBCR_M,
-        MISCREG_DBGDRAR,
-        MISCREG_DBGBXVR_M,
-        MISCREG_DBGOSLAR,
-        MISCREG_DBGOSSRR,
-        MISCREG_DBGOSDLR,
-        MISCREG_DBGPRCR,
-        MISCREG_DBGPRSR,
-        MISCREG_DBGDSAR,
-        MISCREG_DBGITCTRL,
-        MISCREG_DBGCLAIMSET,
-        MISCREG_DBGCLAIMCLR,
-        MISCREG_DBGAUTHSTATUS,
-        MISCREG_DBGDEVID2,
-        MISCREG_DBGDEVID1,
-        MISCREG_DBGDEVID,
-        MISCREG_TEEHBR,
+        // Helper registers
+        MISCREG_CPSR_MODE,              //  15
+        MISCREG_CPSR_Q,                 //  16
+        MISCREG_FPSCR_EXC,              //  17
+        MISCREG_FPSCR_QC,               //  18
+        MISCREG_LOCKADDR,               //  19
+        MISCREG_LOCKFLAG,               //  20
+        MISCREG_PRRR_MAIR0,             //  21
+        MISCREG_PRRR_MAIR0_NS,          //  22
+        MISCREG_PRRR_MAIR0_S,           //  23
+        MISCREG_NMRR_MAIR1,             //  24
+        MISCREG_NMRR_MAIR1_NS,          //  25
+        MISCREG_NMRR_MAIR1_S,           //  26
+        MISCREG_PMXEVTYPER_PMCCFILTR,   //  27
+        MISCREG_SCTLR_RST,              //  28
+        MISCREG_SEV_MAILBOX,            //  29
 
-        // CP15 registers
-        MISCREG_CP15_START,
-        MISCREG_SCTLR = MISCREG_CP15_START,
-        MISCREG_DCCISW,
-        MISCREG_DCCIMVAC,
-        MISCREG_DCCMVAC,
-        MISCREG_CONTEXTIDR,
-        MISCREG_TPIDRURW,
-        MISCREG_TPIDRURO,
-        MISCREG_TPIDRPRW,
-        MISCREG_CP15ISB,
-        MISCREG_CP15DSB,
-        MISCREG_CP15DMB,
-        MISCREG_CPACR,
-        MISCREG_CLIDR,
-        MISCREG_CCSIDR,
-        MISCREG_CSSELR,
-        MISCREG_ICIALLUIS,
-        MISCREG_ICIALLU,
-        MISCREG_ICIMVAU,
-        MISCREG_BPIMVA,
-        MISCREG_BPIALLIS,
-        MISCREG_BPIALL,
-        MISCREG_MIDR,
-        MISCREG_TTBR0,
-        MISCREG_TTBR1,
-        MISCREG_TLBTR,
-        MISCREG_DACR,
-        MISCREG_TLBIALLIS,
-        MISCREG_TLBIMVAIS,
-        MISCREG_TLBIASIDIS,
-        MISCREG_TLBIMVAAIS,
-        MISCREG_ITLBIALL,
-        MISCREG_ITLBIMVA,
-        MISCREG_ITLBIASID,
-        MISCREG_DTLBIALL,
-        MISCREG_DTLBIMVA,
-        MISCREG_DTLBIASID,
-        MISCREG_TLBIALL,
-        MISCREG_TLBIMVA,
-        MISCREG_TLBIASID,
-        MISCREG_TLBIMVAA,
-        MISCREG_DFSR,
-        MISCREG_IFSR,
-        MISCREG_DFAR,
-        MISCREG_IFAR,
-        MISCREG_MPIDR,
-        MISCREG_PRRR,
-        MISCREG_NMRR,
-        MISCREG_TTBCR,
-        MISCREG_ID_PFR0,
-        MISCREG_CTR,
-        MISCREG_SCR,
-        MISCREG_SDER,
-        MISCREG_PAR,
-        MISCREG_V2PCWPR,
-        MISCREG_V2PCWPW,
-        MISCREG_V2PCWUR,
-        MISCREG_V2PCWUW,
-        MISCREG_V2POWPR,
-        MISCREG_V2POWPW,
-        MISCREG_V2POWUR,
-        MISCREG_V2POWUW,
-        MISCREG_ID_MMFR0,
-        MISCREG_ID_MMFR2,
-        MISCREG_ID_MMFR3,
-        MISCREG_ACTLR,
-        MISCREG_PMCR,
-        MISCREG_PMCCNTR,
-        MISCREG_PMCNTENSET,
-        MISCREG_PMCNTENCLR,
-        MISCREG_PMOVSR,
-        MISCREG_PMSWINC,
-        MISCREG_PMSELR,
-        MISCREG_PMCEID0,
-        MISCREG_PMCEID1,
-        MISCREG_PMC_OTHER,
-        MISCREG_PMXEVCNTR,
-        MISCREG_PMUSERENR,
-        MISCREG_PMINTENSET,
-        MISCREG_PMINTENCLR,
-        MISCREG_ID_ISAR0,
-        MISCREG_ID_ISAR1,
-        MISCREG_ID_ISAR2,
-        MISCREG_ID_ISAR3,
-        MISCREG_ID_ISAR4,
-        MISCREG_ID_ISAR5,
-        MISCREG_LOCKFLAG,
-        MISCREG_LOCKADDR,
-        MISCREG_ID_PFR1,
-        MISCREG_L2CTLR,
-        MISCREG_CP15_UNIMP_START,
-        MISCREG_TCMTR = MISCREG_CP15_UNIMP_START,
-        MISCREG_ID_DFR0,
-        MISCREG_ID_AFR0,
-        MISCREG_ID_MMFR1,
-        MISCREG_AIDR,
-        MISCREG_ADFSR,
-        MISCREG_AIFSR,
-        MISCREG_DCIMVAC,
-        MISCREG_DCISW,
-        MISCREG_MCCSW,
-        MISCREG_DCCMVAU,
-        MISCREG_NSACR,
-        MISCREG_VBAR,
-        MISCREG_MVBAR,
-        MISCREG_ISR,
-        MISCREG_FCEIDR,
-        MISCREG_L2LATENCY,
-        MISCREG_CRN15,
+        // AArch32 CP14 registers (debug/trace/ThumbEE/Jazelle control)
+        MISCREG_DBGDIDR,                //  30
+        MISCREG_DBGDSCRint,             //  31
+        MISCREG_DBGDCCINT,              //  32
+        MISCREG_DBGDTRTXint,            //  33
+        MISCREG_DBGDTRRXint,            //  34
+        MISCREG_DBGWFAR,                //  35
+        MISCREG_DBGVCR,                 //  36
+        MISCREG_DBGDTRRXext,            //  37
+        MISCREG_DBGDSCRext,             //  38
+        MISCREG_DBGDTRTXext,            //  39
+        MISCREG_DBGOSECCR,              //  40
+        MISCREG_DBGBVR0,                //  41
+        MISCREG_DBGBVR1,                //  42
+        MISCREG_DBGBVR2,                //  43
+        MISCREG_DBGBVR3,                //  44
+        MISCREG_DBGBVR4,                //  45
+        MISCREG_DBGBVR5,                //  46
+        MISCREG_DBGBCR0,                //  47
+        MISCREG_DBGBCR1,                //  48
+        MISCREG_DBGBCR2,                //  49
+        MISCREG_DBGBCR3,                //  50
+        MISCREG_DBGBCR4,                //  51
+        MISCREG_DBGBCR5,                //  52
+        MISCREG_DBGWVR0,                //  53
+        MISCREG_DBGWVR1,                //  54
+        MISCREG_DBGWVR2,                //  55
+        MISCREG_DBGWVR3,                //  56
+        MISCREG_DBGWCR0,                //  57
+        MISCREG_DBGWCR1,                //  58
+        MISCREG_DBGWCR2,                //  59
+        MISCREG_DBGWCR3,                //  60
+        MISCREG_DBGDRAR,                //  61
+        MISCREG_DBGBXVR4,               //  62
+        MISCREG_DBGBXVR5,               //  63
+        MISCREG_DBGOSLAR,               //  64
+        MISCREG_DBGOSLSR,               //  65
+        MISCREG_DBGOSDLR,               //  66
+        MISCREG_DBGPRCR,                //  67
+        MISCREG_DBGDSAR,                //  68
+        MISCREG_DBGCLAIMSET,            //  69
+        MISCREG_DBGCLAIMCLR,            //  70
+        MISCREG_DBGAUTHSTATUS,          //  71
+        MISCREG_DBGDEVID2,              //  72
+        MISCREG_DBGDEVID1,              //  73
+        MISCREG_DBGDEVID0,              //  74
+        MISCREG_TEECR,                  //  75
+        MISCREG_JIDR,                   //  76
+        MISCREG_TEEHBR,                 //  77
+        MISCREG_JOSCR,                  //  78
+        MISCREG_JMCR,                   //  79
 
+        // AArch32 CP15 registers (system control)
+        MISCREG_MIDR,                   //  80
+        MISCREG_CTR,                    //  81
+        MISCREG_TCMTR,                  //  82
+        MISCREG_TLBTR,                  //  83
+        MISCREG_MPIDR,                  //  84
+        MISCREG_REVIDR,                 //  85
+        MISCREG_ID_PFR0,                //  86
+        MISCREG_ID_PFR1,                //  87
+        MISCREG_ID_DFR0,                //  88
+        MISCREG_ID_AFR0,                //  89
+        MISCREG_ID_MMFR0,               //  90
+        MISCREG_ID_MMFR1,               //  91
+        MISCREG_ID_MMFR2,               //  92
+        MISCREG_ID_MMFR3,               //  93
+        MISCREG_ID_ISAR0,               //  94
+        MISCREG_ID_ISAR1,               //  95
+        MISCREG_ID_ISAR2,               //  96
+        MISCREG_ID_ISAR3,               //  97
+        MISCREG_ID_ISAR4,               //  98
+        MISCREG_ID_ISAR5,               //  99
+        MISCREG_CCSIDR,                 // 100
+        MISCREG_CLIDR,                  // 101
+        MISCREG_AIDR,                   // 102
+        MISCREG_CSSELR,                 // 103
+        MISCREG_CSSELR_NS,              // 104
+        MISCREG_CSSELR_S,               // 105
+        MISCREG_VPIDR,                  // 106
+        MISCREG_VMPIDR,                 // 107
+        MISCREG_SCTLR,                  // 108
+        MISCREG_SCTLR_NS,               // 109
+        MISCREG_SCTLR_S,                // 110
+        MISCREG_ACTLR,                  // 111
+        MISCREG_ACTLR_NS,               // 112
+        MISCREG_ACTLR_S,                // 113
+        MISCREG_CPACR,                  // 114
+        MISCREG_SCR,                    // 115
+        MISCREG_SDER,                   // 116
+        MISCREG_NSACR,                  // 117
+        MISCREG_HSCTLR,                 // 118
+        MISCREG_HACTLR,                 // 119
+        MISCREG_HCR,                    // 120
+        MISCREG_HDCR,                   // 121
+        MISCREG_HCPTR,                  // 122
+        MISCREG_HSTR,                   // 123
+        MISCREG_HACR,                   // 124
+        MISCREG_TTBR0,                  // 125
+        MISCREG_TTBR0_NS,               // 126
+        MISCREG_TTBR0_S,                // 127
+        MISCREG_TTBR1,                  // 128
+        MISCREG_TTBR1_NS,               // 129
+        MISCREG_TTBR1_S,                // 130
+        MISCREG_TTBCR,                  // 131
+        MISCREG_TTBCR_NS,               // 132
+        MISCREG_TTBCR_S,                // 133
+        MISCREG_HTCR,                   // 134
+        MISCREG_VTCR,                   // 135
+        MISCREG_DACR,                   // 136
+        MISCREG_DACR_NS,                // 137
+        MISCREG_DACR_S,                 // 138
+        MISCREG_DFSR,                   // 139
+        MISCREG_DFSR_NS,                // 140
+        MISCREG_DFSR_S,                 // 141
+        MISCREG_IFSR,                   // 142
+        MISCREG_IFSR_NS,                // 143
+        MISCREG_IFSR_S,                 // 144
+        MISCREG_ADFSR,                  // 145
+        MISCREG_ADFSR_NS,               // 146
+        MISCREG_ADFSR_S,                // 147
+        MISCREG_AIFSR,                  // 148
+        MISCREG_AIFSR_NS,               // 149
+        MISCREG_AIFSR_S,                // 150
+        MISCREG_HADFSR,                 // 151
+        MISCREG_HAIFSR,                 // 152
+        MISCREG_HSR,                    // 153
+        MISCREG_DFAR,                   // 154
+        MISCREG_DFAR_NS,                // 155
+        MISCREG_DFAR_S,                 // 156
+        MISCREG_IFAR,                   // 157
+        MISCREG_IFAR_NS,                // 158
+        MISCREG_IFAR_S,                 // 159
+        MISCREG_HDFAR,                  // 160
+        MISCREG_HIFAR,                  // 161
+        MISCREG_HPFAR,                  // 162
+        MISCREG_ICIALLUIS,              // 163
+        MISCREG_BPIALLIS,               // 164
+        MISCREG_PAR,                    // 165
+        MISCREG_PAR_NS,                 // 166
+        MISCREG_PAR_S,                  // 167
+        MISCREG_ICIALLU,                // 168
+        MISCREG_ICIMVAU,                // 169
+        MISCREG_CP15ISB,                // 170
+        MISCREG_BPIALL,                 // 171
+        MISCREG_BPIMVA,                 // 172
+        MISCREG_DCIMVAC,                // 173
+        MISCREG_DCISW,                  // 174
+        MISCREG_ATS1CPR,                // 175
+        MISCREG_ATS1CPW,                // 176
+        MISCREG_ATS1CUR,                // 177
+        MISCREG_ATS1CUW,                // 178
+        MISCREG_ATS12NSOPR,             // 179
+        MISCREG_ATS12NSOPW,             // 180
+        MISCREG_ATS12NSOUR,             // 181
+        MISCREG_ATS12NSOUW,             // 182
+        MISCREG_DCCMVAC,                // 183
+        MISCREG_DCCSW,                  // 184
+        MISCREG_CP15DSB,                // 185
+        MISCREG_CP15DMB,                // 186
+        MISCREG_DCCMVAU,                // 187
+        MISCREG_DCCIMVAC,               // 188
+        MISCREG_DCCISW,                 // 189
+        MISCREG_ATS1HR,                 // 190
+        MISCREG_ATS1HW,                 // 191
+        MISCREG_TLBIALLIS,              // 192
+        MISCREG_TLBIMVAIS,              // 193
+        MISCREG_TLBIASIDIS,             // 194
+        MISCREG_TLBIMVAAIS,             // 195
+        MISCREG_TLBIMVALIS,             // 196
+        MISCREG_TLBIMVAALIS,            // 197
+        MISCREG_ITLBIALL,               // 198
+        MISCREG_ITLBIMVA,               // 199
+        MISCREG_ITLBIASID,              // 200
+        MISCREG_DTLBIALL,               // 201
+        MISCREG_DTLBIMVA,               // 202
+        MISCREG_DTLBIASID,              // 203
+        MISCREG_TLBIALL,                // 204
+        MISCREG_TLBIMVA,                // 205
+        MISCREG_TLBIASID,               // 206
+        MISCREG_TLBIMVAA,               // 207
+        MISCREG_TLBIMVAL,               // 208
+        MISCREG_TLBIMVAAL,              // 209
+        MISCREG_TLBIIPAS2IS,            // 210
+        MISCREG_TLBIIPAS2LIS,           // 211
+        MISCREG_TLBIALLHIS,             // 212
+        MISCREG_TLBIMVAHIS,             // 213
+        MISCREG_TLBIALLNSNHIS,          // 214
+        MISCREG_TLBIMVALHIS,            // 215
+        MISCREG_TLBIIPAS2,              // 216
+        MISCREG_TLBIIPAS2L,             // 217
+        MISCREG_TLBIALLH,               // 218
+        MISCREG_TLBIMVAH,               // 219
+        MISCREG_TLBIALLNSNH,            // 220
+        MISCREG_TLBIMVALH,              // 221
+        MISCREG_PMCR,                   // 222
+        MISCREG_PMCNTENSET,             // 223
+        MISCREG_PMCNTENCLR,             // 224
+        MISCREG_PMOVSR,                 // 225
+        MISCREG_PMSWINC,                // 226
+        MISCREG_PMSELR,                 // 227
+        MISCREG_PMCEID0,                // 228
+        MISCREG_PMCEID1,                // 229
+        MISCREG_PMCCNTR,                // 230
+        MISCREG_PMXEVTYPER,             // 231
+        MISCREG_PMCCFILTR,              // 232
+        MISCREG_PMXEVCNTR,              // 233
+        MISCREG_PMUSERENR,              // 234
+        MISCREG_PMINTENSET,             // 235
+        MISCREG_PMINTENCLR,             // 236
+        MISCREG_PMOVSSET,               // 237
+        MISCREG_L2CTLR,                 // 238
+        MISCREG_L2ECTLR,                // 239
+        MISCREG_PRRR,                   // 240
+        MISCREG_PRRR_NS,                // 241
+        MISCREG_PRRR_S,                 // 242
+        MISCREG_MAIR0,                  // 243
+        MISCREG_MAIR0_NS,               // 244
+        MISCREG_MAIR0_S,                // 245
+        MISCREG_NMRR,                   // 246
+        MISCREG_NMRR_NS,                // 247
+        MISCREG_NMRR_S,                 // 248
+        MISCREG_MAIR1,                  // 249
+        MISCREG_MAIR1_NS,               // 250
+        MISCREG_MAIR1_S,                // 251
+        MISCREG_AMAIR0,                 // 252
+        MISCREG_AMAIR0_NS,              // 253
+        MISCREG_AMAIR0_S,               // 254
+        MISCREG_AMAIR1,                 // 255
+        MISCREG_AMAIR1_NS,              // 256
+        MISCREG_AMAIR1_S,               // 257
+        MISCREG_HMAIR0,                 // 258
+        MISCREG_HMAIR1,                 // 259
+        MISCREG_HAMAIR0,                // 260
+        MISCREG_HAMAIR1,                // 261
+        MISCREG_VBAR,                   // 262
+        MISCREG_VBAR_NS,                // 263
+        MISCREG_VBAR_S,                 // 264
+        MISCREG_MVBAR,                  // 265
+        MISCREG_RMR,                    // 266
+        MISCREG_ISR,                    // 267
+        MISCREG_HVBAR,                  // 268
+        MISCREG_FCSEIDR,                // 269
+        MISCREG_CONTEXTIDR,             // 270
+        MISCREG_CONTEXTIDR_NS,          // 271
+        MISCREG_CONTEXTIDR_S,           // 272
+        MISCREG_TPIDRURW,               // 273
+        MISCREG_TPIDRURW_NS,            // 274
+        MISCREG_TPIDRURW_S,             // 275
+        MISCREG_TPIDRURO,               // 276
+        MISCREG_TPIDRURO_NS,            // 277
+        MISCREG_TPIDRURO_S,             // 278
+        MISCREG_TPIDRPRW,               // 279
+        MISCREG_TPIDRPRW_NS,            // 280
+        MISCREG_TPIDRPRW_S,             // 281
+        MISCREG_HTPIDR,                 // 282
+        MISCREG_CNTFRQ,                 // 283
+        MISCREG_CNTKCTL,                // 284
+        MISCREG_CNTP_TVAL,              // 285
+        MISCREG_CNTP_TVAL_NS,           // 286
+        MISCREG_CNTP_TVAL_S,            // 287
+        MISCREG_CNTP_CTL,               // 288
+        MISCREG_CNTP_CTL_NS,            // 289
+        MISCREG_CNTP_CTL_S,             // 290
+        MISCREG_CNTV_TVAL,              // 291
+        MISCREG_CNTV_CTL,               // 292
+        MISCREG_CNTHCTL,                // 293
+        MISCREG_CNTHP_TVAL,             // 294
+        MISCREG_CNTHP_CTL,              // 295
+        MISCREG_IL1DATA0,               // 296
+        MISCREG_IL1DATA1,               // 297
+        MISCREG_IL1DATA2,               // 298
+        MISCREG_IL1DATA3,               // 299
+        MISCREG_DL1DATA0,               // 300
+        MISCREG_DL1DATA1,               // 301
+        MISCREG_DL1DATA2,               // 302
+        MISCREG_DL1DATA3,               // 303
+        MISCREG_DL1DATA4,               // 304
+        MISCREG_RAMINDEX,               // 305
+        MISCREG_L2ACTLR,                // 306
+        MISCREG_CBAR,                   // 307
+        MISCREG_HTTBR,                  // 308
+        MISCREG_VTTBR,                  // 309
+        MISCREG_CNTPCT,                 // 310
+        MISCREG_CNTVCT,                 // 311
+        MISCREG_CNTP_CVAL,              // 312
+        MISCREG_CNTP_CVAL_NS,           // 313
+        MISCREG_CNTP_CVAL_S,            // 314
+        MISCREG_CNTV_CVAL,              // 315
+        MISCREG_CNTVOFF,                // 316
+        MISCREG_CNTHP_CVAL,             // 317
+        MISCREG_CPUMERRSR,              // 318
+        MISCREG_L2MERRSR,               // 319
 
-        MISCREG_CP15_END,
+        // AArch64 registers (Op0=2)
+        MISCREG_MDCCINT_EL1,            // 320
+        MISCREG_OSDTRRX_EL1,            // 321
+        MISCREG_MDSCR_EL1,              // 322
+        MISCREG_OSDTRTX_EL1,            // 323
+        MISCREG_OSECCR_EL1,             // 324
+        MISCREG_DBGBVR0_EL1,            // 325
+        MISCREG_DBGBVR1_EL1,            // 326
+        MISCREG_DBGBVR2_EL1,            // 327
+        MISCREG_DBGBVR3_EL1,            // 328
+        MISCREG_DBGBVR4_EL1,            // 329
+        MISCREG_DBGBVR5_EL1,            // 330
+        MISCREG_DBGBCR0_EL1,            // 331
+        MISCREG_DBGBCR1_EL1,            // 332
+        MISCREG_DBGBCR2_EL1,            // 333
+        MISCREG_DBGBCR3_EL1,            // 334
+        MISCREG_DBGBCR4_EL1,            // 335
+        MISCREG_DBGBCR5_EL1,            // 336
+        MISCREG_DBGWVR0_EL1,            // 337
+        MISCREG_DBGWVR1_EL1,            // 338
+        MISCREG_DBGWVR2_EL1,            // 339
+        MISCREG_DBGWVR3_EL1,            // 340
+        MISCREG_DBGWCR0_EL1,            // 341
+        MISCREG_DBGWCR1_EL1,            // 342
+        MISCREG_DBGWCR2_EL1,            // 343
+        MISCREG_DBGWCR3_EL1,            // 344
+        MISCREG_MDCCSR_EL0,             // 345
+        MISCREG_MDDTR_EL0,              // 346
+        MISCREG_MDDTRTX_EL0,            // 347
+        MISCREG_MDDTRRX_EL0,            // 348
+        MISCREG_DBGVCR32_EL2,           // 349
+        MISCREG_MDRAR_EL1,              // 350
+        MISCREG_OSLAR_EL1,              // 351
+        MISCREG_OSLSR_EL1,              // 352
+        MISCREG_OSDLR_EL1,              // 353
+        MISCREG_DBGPRCR_EL1,            // 354
+        MISCREG_DBGCLAIMSET_EL1,        // 355
+        MISCREG_DBGCLAIMCLR_EL1,        // 356
+        MISCREG_DBGAUTHSTATUS_EL1,      // 357
+        MISCREG_TEECR32_EL1,            // 358
+        MISCREG_TEEHBR32_EL1,           // 359
 
-        // Dummy indices
-        MISCREG_NOP = MISCREG_CP15_END,
-        MISCREG_RAZ,
+        // AArch64 registers (Op0=1,3)
+        MISCREG_MIDR_EL1,               // 360
+        MISCREG_MPIDR_EL1,              // 361
+        MISCREG_REVIDR_EL1,             // 362
+        MISCREG_ID_PFR0_EL1,            // 363
+        MISCREG_ID_PFR1_EL1,            // 364
+        MISCREG_ID_DFR0_EL1,            // 365
+        MISCREG_ID_AFR0_EL1,            // 366
+        MISCREG_ID_MMFR0_EL1,           // 367
+        MISCREG_ID_MMFR1_EL1,           // 368
+        MISCREG_ID_MMFR2_EL1,           // 369
+        MISCREG_ID_MMFR3_EL1,           // 370
+        MISCREG_ID_ISAR0_EL1,           // 371
+        MISCREG_ID_ISAR1_EL1,           // 372
+        MISCREG_ID_ISAR2_EL1,           // 373
+        MISCREG_ID_ISAR3_EL1,           // 374
+        MISCREG_ID_ISAR4_EL1,           // 375
+        MISCREG_ID_ISAR5_EL1,           // 376
+        MISCREG_MVFR0_EL1,              // 377
+        MISCREG_MVFR1_EL1,              // 378
+        MISCREG_MVFR2_EL1,              // 379
+        MISCREG_ID_AA64PFR0_EL1,        // 380
+        MISCREG_ID_AA64PFR1_EL1,        // 381
+        MISCREG_ID_AA64DFR0_EL1,        // 382
+        MISCREG_ID_AA64DFR1_EL1,        // 383
+        MISCREG_ID_AA64AFR0_EL1,        // 384
+        MISCREG_ID_AA64AFR1_EL1,        // 385
+        MISCREG_ID_AA64ISAR0_EL1,       // 386
+        MISCREG_ID_AA64ISAR1_EL1,       // 387
+        MISCREG_ID_AA64MMFR0_EL1,       // 388
+        MISCREG_ID_AA64MMFR1_EL1,       // 389
+        MISCREG_CCSIDR_EL1,             // 390
+        MISCREG_CLIDR_EL1,              // 391
+        MISCREG_AIDR_EL1,               // 392
+        MISCREG_CSSELR_EL1,             // 393
+        MISCREG_CTR_EL0,                // 394
+        MISCREG_DCZID_EL0,              // 395
+        MISCREG_VPIDR_EL2,              // 396
+        MISCREG_VMPIDR_EL2,             // 397
+        MISCREG_SCTLR_EL1,              // 398
+        MISCREG_ACTLR_EL1,              // 399
+        MISCREG_CPACR_EL1,              // 400
+        MISCREG_SCTLR_EL2,              // 401
+        MISCREG_ACTLR_EL2,              // 402
+        MISCREG_HCR_EL2,                // 403
+        MISCREG_MDCR_EL2,               // 404
+        MISCREG_CPTR_EL2,               // 405
+        MISCREG_HSTR_EL2,               // 406
+        MISCREG_HACR_EL2,               // 407
+        MISCREG_SCTLR_EL3,              // 408
+        MISCREG_ACTLR_EL3,              // 409
+        MISCREG_SCR_EL3,                // 410
+        MISCREG_SDER32_EL3,             // 411
+        MISCREG_CPTR_EL3,               // 412
+        MISCREG_MDCR_EL3,               // 413
+        MISCREG_TTBR0_EL1,              // 414
+        MISCREG_TTBR1_EL1,              // 415
+        MISCREG_TCR_EL1,                // 416
+        MISCREG_TTBR0_EL2,              // 417
+        MISCREG_TCR_EL2,                // 418
+        MISCREG_VTTBR_EL2,              // 419
+        MISCREG_VTCR_EL2,               // 420
+        MISCREG_TTBR0_EL3,              // 421
+        MISCREG_TCR_EL3,                // 422
+        MISCREG_DACR32_EL2,             // 423
+        MISCREG_SPSR_EL1,               // 424
+        MISCREG_ELR_EL1,                // 425
+        MISCREG_SP_EL0,                 // 426
+        MISCREG_SPSEL,                  // 427
+        MISCREG_CURRENTEL,              // 428
+        MISCREG_NZCV,                   // 429
+        MISCREG_DAIF,                   // 430
+        MISCREG_FPCR,                   // 431
+        MISCREG_FPSR,                   // 432
+        MISCREG_DSPSR_EL0,              // 433
+        MISCREG_DLR_EL0,                // 434
+        MISCREG_SPSR_EL2,               // 435
+        MISCREG_ELR_EL2,                // 436
+        MISCREG_SP_EL1,                 // 437
+        MISCREG_SPSR_IRQ_AA64,          // 438
+        MISCREG_SPSR_ABT_AA64,          // 439
+        MISCREG_SPSR_UND_AA64,          // 440
+        MISCREG_SPSR_FIQ_AA64,          // 441
+        MISCREG_SPSR_EL3,               // 442
+        MISCREG_ELR_EL3,                // 443
+        MISCREG_SP_EL2,                 // 444
+        MISCREG_AFSR0_EL1,              // 445
+        MISCREG_AFSR1_EL1,              // 446
+        MISCREG_ESR_EL1,                // 447
+        MISCREG_IFSR32_EL2,             // 448
+        MISCREG_AFSR0_EL2,              // 449
+        MISCREG_AFSR1_EL2,              // 450
+        MISCREG_ESR_EL2,                // 451
+        MISCREG_FPEXC32_EL2,            // 452
+        MISCREG_AFSR0_EL3,              // 453
+        MISCREG_AFSR1_EL3,              // 454
+        MISCREG_ESR_EL3,                // 455
+        MISCREG_FAR_EL1,                // 456
+        MISCREG_FAR_EL2,                // 457
+        MISCREG_HPFAR_EL2,              // 458
+        MISCREG_FAR_EL3,                // 459
+        MISCREG_IC_IALLUIS,             // 460
+        MISCREG_PAR_EL1,                // 461
+        MISCREG_IC_IALLU,               // 462
+        MISCREG_DC_IVAC_Xt,             // 463
+        MISCREG_DC_ISW_Xt,              // 464
+        MISCREG_AT_S1E1R_Xt,            // 465
+        MISCREG_AT_S1E1W_Xt,            // 466
+        MISCREG_AT_S1E0R_Xt,            // 467
+        MISCREG_AT_S1E0W_Xt,            // 468
+        MISCREG_DC_CSW_Xt,              // 469
+        MISCREG_DC_CISW_Xt,             // 470
+        MISCREG_DC_ZVA_Xt,              // 471
+        MISCREG_IC_IVAU_Xt,             // 472
+        MISCREG_DC_CVAC_Xt,             // 473
+        MISCREG_DC_CVAU_Xt,             // 474
+        MISCREG_DC_CIVAC_Xt,            // 475
+        MISCREG_AT_S1E2R_Xt,            // 476
+        MISCREG_AT_S1E2W_Xt,            // 477
+        MISCREG_AT_S12E1R_Xt,           // 478
+        MISCREG_AT_S12E1W_Xt,           // 479
+        MISCREG_AT_S12E0R_Xt,           // 480
+        MISCREG_AT_S12E0W_Xt,           // 481
+        MISCREG_AT_S1E3R_Xt,            // 482
+        MISCREG_AT_S1E3W_Xt,            // 483
+        MISCREG_TLBI_VMALLE1IS,         // 484
+        MISCREG_TLBI_VAE1IS_Xt,         // 485
+        MISCREG_TLBI_ASIDE1IS_Xt,       // 486
+        MISCREG_TLBI_VAAE1IS_Xt,        // 487
+        MISCREG_TLBI_VALE1IS_Xt,        // 488
+        MISCREG_TLBI_VAALE1IS_Xt,       // 489
+        MISCREG_TLBI_VMALLE1,           // 490
+        MISCREG_TLBI_VAE1_Xt,           // 491
+        MISCREG_TLBI_ASIDE1_Xt,         // 492
+        MISCREG_TLBI_VAAE1_Xt,          // 493
+        MISCREG_TLBI_VALE1_Xt,          // 494
+        MISCREG_TLBI_VAALE1_Xt,         // 495
+        MISCREG_TLBI_IPAS2E1IS_Xt,      // 496
+        MISCREG_TLBI_IPAS2LE1IS_Xt,     // 497
+        MISCREG_TLBI_ALLE2IS,           // 498
+        MISCREG_TLBI_VAE2IS_Xt,         // 499
+        MISCREG_TLBI_ALLE1IS,           // 500
+        MISCREG_TLBI_VALE2IS_Xt,        // 501
+        MISCREG_TLBI_VMALLS12E1IS,      // 502
+        MISCREG_TLBI_IPAS2E1_Xt,        // 503
+        MISCREG_TLBI_IPAS2LE1_Xt,       // 504
+        MISCREG_TLBI_ALLE2,             // 505
+        MISCREG_TLBI_VAE2_Xt,           // 506
+        MISCREG_TLBI_ALLE1,             // 507
+        MISCREG_TLBI_VALE2_Xt,          // 508
+        MISCREG_TLBI_VMALLS12E1,        // 509
+        MISCREG_TLBI_ALLE3IS,           // 510
+        MISCREG_TLBI_VAE3IS_Xt,         // 511
+        MISCREG_TLBI_VALE3IS_Xt,        // 512
+        MISCREG_TLBI_ALLE3,             // 513
+        MISCREG_TLBI_VAE3_Xt,           // 514
+        MISCREG_TLBI_VALE3_Xt,          // 515
+        MISCREG_PMINTENSET_EL1,         // 516
+        MISCREG_PMINTENCLR_EL1,         // 517
+        MISCREG_PMCR_EL0,               // 518
+        MISCREG_PMCNTENSET_EL0,         // 519
+        MISCREG_PMCNTENCLR_EL0,         // 520
+        MISCREG_PMOVSCLR_EL0,           // 521
+        MISCREG_PMSWINC_EL0,            // 522
+        MISCREG_PMSELR_EL0,             // 523
+        MISCREG_PMCEID0_EL0,            // 524
+        MISCREG_PMCEID1_EL0,            // 525
+        MISCREG_PMCCNTR_EL0,            // 526
+        MISCREG_PMXEVTYPER_EL0,         // 527
+        MISCREG_PMCCFILTR_EL0,          // 528
+        MISCREG_PMXEVCNTR_EL0,          // 529
+        MISCREG_PMUSERENR_EL0,          // 530
+        MISCREG_PMOVSSET_EL0,           // 531
+        MISCREG_MAIR_EL1,               // 532
+        MISCREG_AMAIR_EL1,              // 533
+        MISCREG_MAIR_EL2,               // 534
+        MISCREG_AMAIR_EL2,              // 535
+        MISCREG_MAIR_EL3,               // 536
+        MISCREG_AMAIR_EL3,              // 537
+        MISCREG_L2CTLR_EL1,             // 538
+        MISCREG_L2ECTLR_EL1,            // 539
+        MISCREG_VBAR_EL1,               // 540
+        MISCREG_RVBAR_EL1,              // 541
+        MISCREG_ISR_EL1,                // 542
+        MISCREG_VBAR_EL2,               // 543
+        MISCREG_RVBAR_EL2,              // 544
+        MISCREG_VBAR_EL3,               // 545
+        MISCREG_RVBAR_EL3,              // 546
+        MISCREG_RMR_EL3,                // 547
+        MISCREG_CONTEXTIDR_EL1,         // 548
+        MISCREG_TPIDR_EL1,              // 549
+        MISCREG_TPIDR_EL0,              // 550
+        MISCREG_TPIDRRO_EL0,            // 551
+        MISCREG_TPIDR_EL2,              // 552
+        MISCREG_TPIDR_EL3,              // 553
+        MISCREG_CNTKCTL_EL1,            // 554
+        MISCREG_CNTFRQ_EL0,             // 555
+        MISCREG_CNTPCT_EL0,             // 556
+        MISCREG_CNTVCT_EL0,             // 557
+        MISCREG_CNTP_TVAL_EL0,          // 558
+        MISCREG_CNTP_CTL_EL0,           // 559
+        MISCREG_CNTP_CVAL_EL0,          // 560
+        MISCREG_CNTV_TVAL_EL0,          // 561
+        MISCREG_CNTV_CTL_EL0,           // 562
+        MISCREG_CNTV_CVAL_EL0,          // 563
+        MISCREG_PMEVCNTR0_EL0,          // 564
+        MISCREG_PMEVCNTR1_EL0,          // 565
+        MISCREG_PMEVCNTR2_EL0,          // 566
+        MISCREG_PMEVCNTR3_EL0,          // 567
+        MISCREG_PMEVCNTR4_EL0,          // 568
+        MISCREG_PMEVCNTR5_EL0,          // 569
+        MISCREG_PMEVTYPER0_EL0,         // 570
+        MISCREG_PMEVTYPER1_EL0,         // 571
+        MISCREG_PMEVTYPER2_EL0,         // 572
+        MISCREG_PMEVTYPER3_EL0,         // 573
+        MISCREG_PMEVTYPER4_EL0,         // 574
+        MISCREG_PMEVTYPER5_EL0,         // 575
+        MISCREG_CNTVOFF_EL2,            // 576
+        MISCREG_CNTHCTL_EL2,            // 577
+        MISCREG_CNTHP_TVAL_EL2,         // 578
+        MISCREG_CNTHP_CTL_EL2,          // 579
+        MISCREG_CNTHP_CVAL_EL2,         // 580
+        MISCREG_CNTPS_TVAL_EL1,         // 581
+        MISCREG_CNTPS_CTL_EL1,          // 582
+        MISCREG_CNTPS_CVAL_EL1,         // 583
+        MISCREG_IL1DATA0_EL1,           // 584
+        MISCREG_IL1DATA1_EL1,           // 585
+        MISCREG_IL1DATA2_EL1,           // 586
+        MISCREG_IL1DATA3_EL1,           // 587
+        MISCREG_DL1DATA0_EL1,           // 588
+        MISCREG_DL1DATA1_EL1,           // 589
+        MISCREG_DL1DATA2_EL1,           // 590
+        MISCREG_DL1DATA3_EL1,           // 591
+        MISCREG_DL1DATA4_EL1,           // 592
+        MISCREG_L2ACTLR_EL1,            // 593
+        MISCREG_CPUACTLR_EL1,           // 594
+        MISCREG_CPUECTLR_EL1,           // 595
+        MISCREG_CPUMERRSR_EL1,          // 596
+        MISCREG_L2MERRSR_EL1,           // 597
+        MISCREG_CBAR_EL1,               // 598
 
-        NUM_MISCREGS
+        // Dummy registers
+        MISCREG_NOP,                    // 599
+        MISCREG_RAZ,                    // 600
+        MISCREG_CP14_UNIMPL,            // 601
+        MISCREG_CP15_UNIMPL,            // 602
+        MISCREG_A64_UNIMPL,             // 603
+        MISCREG_UNKNOWN,                // 604
+
+        NUM_MISCREGS                    // 605
     };
 
+    enum MiscRegInfo {
+        MISCREG_IMPLEMENTED,
+        MISCREG_WARN_NOT_FAIL,  // If MISCREG_IMPLEMENTED is deasserted, it
+                                // tells whether the instruction should raise a
+                                // warning or fail
+        MISCREG_MUTEX,  // True if the register corresponds to a pair of
+                        // mutually exclusive registers
+        MISCREG_BANKED,  // True if the register is banked between the two
+                         // security states, and this is the parent node of the
+                         // two banked registers
+        MISCREG_BANKED_CHILD, // The entry is one of the child registers that
+                              // forms a banked set of regs (along with the
+                              // other child regs)
+
+        // Access permissions
+        // User mode
+        MISCREG_USR_NS_RD,
+        MISCREG_USR_NS_WR,
+        MISCREG_USR_S_RD,
+        MISCREG_USR_S_WR,
+        // Privileged modes other than hypervisor or monitor
+        MISCREG_PRI_NS_RD,
+        MISCREG_PRI_NS_WR,
+        MISCREG_PRI_S_RD,
+        MISCREG_PRI_S_WR,
+        // Hypervisor mode
+        MISCREG_HYP_RD,
+        MISCREG_HYP_WR,
+        // Monitor mode, SCR.NS == 0
+        MISCREG_MON_NS0_RD,
+        MISCREG_MON_NS0_WR,
+        // Monitor mode, SCR.NS == 1
+        MISCREG_MON_NS1_RD,
+        MISCREG_MON_NS1_WR,
+
+        NUM_MISCREG_INFOS
+    };
+
+    extern std::bitset<NUM_MISCREG_INFOS> miscRegInfo[NUM_MISCREGS];
+
+    // Decodes 32-bit CP14 registers accessible through MCR/MRC instructions
     MiscRegIndex decodeCP14Reg(unsigned crn, unsigned opc1,
                                unsigned crm, unsigned opc2);
+    MiscRegIndex decodeAArch64SysReg(unsigned op0, unsigned op1,
+                                     unsigned crn, unsigned crm,
+                                     unsigned op2);
+    // Whether a particular AArch64 system register is -always- read only.
+    bool aarch64SysRegReadOnly(MiscRegIndex miscReg);
 
+    // Decodes 32-bit CP15 registers accessible through MCR/MRC instructions
     MiscRegIndex decodeCP15Reg(unsigned crn, unsigned opc1,
                                unsigned crm, unsigned opc2);
 
+    // Decodes 64-bit CP15 registers accessible through MCRR/MRRC instructions
+    MiscRegIndex decodeCP15Reg64(unsigned crm, unsigned opc1);
+
 
     const char * const miscRegName[] = {
-        "cpsr", "cpsr_q", "spsr", "spsr_fiq", "spsr_irq", "spsr_svc",
-        "spsr_mon", "spsr_und", "spsr_abt",
-        "fpsr", "fpsid", "fpscr", "fpscr_qc", "fpscr_exc", "fpexc",
-        "mvfr0", "mvfr1",
-        "sctlr_rst", "sev_mailbox",
-        "DBGDIDR",
-        "DBGDSCR_INT",
-        "DBGDTRRX_INT",
-        "DBGTRTX_INT",
-        "DBGWFAR",
-        "DBGVCR",
-        "DBGECR",
-        "DBGDSCCR",
-        "DBGSMCR",
-        "DBGDTRRX_EXT",
-        "DBGDSCR_EXT",
-        "DBGDTRTX_EXT",
-        "DBGDRCR",
-        "DBGBVR",
-        "DBGBCR",
-        "DBGBVR_M",
-        "DBGBCR_M",
-        "DBGDRAR",
-        "DBGBXVR_M",
-        "DBGOSLAR",
-        "DBGOSSRR",
-        "DBGOSDLR",
-        "DBGPRCR",
-        "DBGPRSR",
-        "DBGDSAR",
-        "DBGITCTRL",
-        "DBGCLAIMSET",
-        "DBGCLAIMCLR",
-        "DBGAUTHSTATUS",
-        "DBGDEVID2",
-        "DBGDEVID1",
-        "DBGDEVID",
-        "TEEHBR",
-        "sctlr", "dccisw", "dccimvac", "dccmvac",
-        "contextidr", "tpidrurw", "tpidruro", "tpidrprw",
-        "cp15isb", "cp15dsb", "cp15dmb", "cpacr",
-        "clidr", "ccsidr", "csselr",
-        "icialluis", "iciallu", "icimvau",
-        "bpimva", "bpiallis", "bpiall",
-        "midr", "ttbr0", "ttbr1", "tlbtr", "dacr",
-        "tlbiallis", "tlbimvais", "tlbiasidis", "tlbimvaais",
-        "itlbiall", "itlbimva", "itlbiasid",
-        "dtlbiall", "dtlbimva", "dtlbiasid",
-        "tlbiall", "tlbimva", "tlbiasid", "tlbimvaa",
-        "dfsr", "ifsr", "dfar", "ifar", "mpidr",
-        "prrr", "nmrr",  "ttbcr", "id_pfr0", "ctr",
-        "scr", "sder", "par",
-        "v2pcwpr", "v2pcwpw", "v2pcwur", "v2pcwuw",
-        "v2powpr", "v2powpw", "v2powur", "v2powuw",
-        "id_mmfr0", "id_mmfr2", "id_mmfr3", "actlr", "pmcr", "pmccntr",
-        "pmcntenset", "pmcntenclr", "pmovsr",
-        "pmswinc", "pmselr", "pmceid0",
-        "pmceid1", "pmc_other", "pmxevcntr",
-        "pmuserenr", "pmintenset", "pmintenclr",
-        "id_isar0", "id_isar1", "id_isar2", "id_isar3", "id_isar4", "id_isar5",
-        "lockflag", "lockaddr", "id_pfr1",
-        "l2ctlr",
-         // Unimplemented below
+        "cpsr",
+        "spsr",
+        "spsr_fiq",
+        "spsr_irq",
+        "spsr_svc",
+        "spsr_mon",
+        "spsr_abt",
+        "spsr_hyp",
+        "spsr_und",
+        "elr_hyp",
+        "fpsid",
+        "fpscr",
+        "mvfr1",
+        "mvfr0",
+        "fpexc",
+
+        // Helper registers
+        "cpsr_mode",
+        "cpsr_q",
+        "fpscr_exc",
+        "fpscr_qc",
+        "lockaddr",
+        "lockflag",
+        "prrr_mair0",
+        "prrr_mair0_ns",
+        "prrr_mair0_s",
+        "nmrr_mair1",
+        "nmrr_mair1_ns",
+        "nmrr_mair1_s",
+        "pmxevtyper_pmccfiltr",
+        "sctlr_rst",
+        "sev_mailbox",
+
+        // AArch32 CP14 registers
+        "dbgdidr",
+        "dbgdscrint",
+        "dbgdccint",
+        "dbgdtrtxint",
+        "dbgdtrrxint",
+        "dbgwfar",
+        "dbgvcr",
+        "dbgdtrrxext",
+        "dbgdscrext",
+        "dbgdtrtxext",
+        "dbgoseccr",
+        "dbgbvr0",
+        "dbgbvr1",
+        "dbgbvr2",
+        "dbgbvr3",
+        "dbgbvr4",
+        "dbgbvr5",
+        "dbgbcr0",
+        "dbgbcr1",
+        "dbgbcr2",
+        "dbgbcr3",
+        "dbgbcr4",
+        "dbgbcr5",
+        "dbgwvr0",
+        "dbgwvr1",
+        "dbgwvr2",
+        "dbgwvr3",
+        "dbgwcr0",
+        "dbgwcr1",
+        "dbgwcr2",
+        "dbgwcr3",
+        "dbgdrar",
+        "dbgbxvr4",
+        "dbgbxvr5",
+        "dbgoslar",
+        "dbgoslsr",
+        "dbgosdlr",
+        "dbgprcr",
+        "dbgdsar",
+        "dbgclaimset",
+        "dbgclaimclr",
+        "dbgauthstatus",
+        "dbgdevid2",
+        "dbgdevid1",
+        "dbgdevid0",
+        "teecr",
+        "jidr",
+        "teehbr",
+        "joscr",
+        "jmcr",
+
+        // AArch32 CP15 registers
+        "midr",
+        "ctr",
         "tcmtr",
-        "id_dfr0", "id_afr0",
+        "tlbtr",
+        "mpidr",
+        "revidr",
+        "id_pfr0",
+        "id_pfr1",
+        "id_dfr0",
+        "id_afr0",
+        "id_mmfr0",
         "id_mmfr1",
-        "aidr", "adfsr", "aifsr",
-        "dcimvac", "dcisw", "mccsw",
-        "dccmvau",
+        "id_mmfr2",
+        "id_mmfr3",
+        "id_isar0",
+        "id_isar1",
+        "id_isar2",
+        "id_isar3",
+        "id_isar4",
+        "id_isar5",
+        "ccsidr",
+        "clidr",
+        "aidr",
+        "csselr",
+        "csselr_ns",
+        "csselr_s",
+        "vpidr",
+        "vmpidr",
+        "sctlr",
+        "sctlr_ns",
+        "sctlr_s",
+        "actlr",
+        "actlr_ns",
+        "actlr_s",
+        "cpacr",
+        "scr",
+        "sder",
         "nsacr",
-        "vbar", "mvbar", "isr", "fceidr", "l2latency",
-        "crn15",
-        "nop", "raz"
+        "hsctlr",
+        "hactlr",
+        "hcr",
+        "hdcr",
+        "hcptr",
+        "hstr",
+        "hacr",
+        "ttbr0",
+        "ttbr0_ns",
+        "ttbr0_s",
+        "ttbr1",
+        "ttbr1_ns",
+        "ttbr1_s",
+        "ttbcr",
+        "ttbcr_ns",
+        "ttbcr_s",
+        "htcr",
+        "vtcr",
+        "dacr",
+        "dacr_ns",
+        "dacr_s",
+        "dfsr",
+        "dfsr_ns",
+        "dfsr_s",
+        "ifsr",
+        "ifsr_ns",
+        "ifsr_s",
+        "adfsr",
+        "adfsr_ns",
+        "adfsr_s",
+        "aifsr",
+        "aifsr_ns",
+        "aifsr_s",
+        "hadfsr",
+        "haifsr",
+        "hsr",
+        "dfar",
+        "dfar_ns",
+        "dfar_s",
+        "ifar",
+        "ifar_ns",
+        "ifar_s",
+        "hdfar",
+        "hifar",
+        "hpfar",
+        "icialluis",
+        "bpiallis",
+        "par",
+        "par_ns",
+        "par_s",
+        "iciallu",
+        "icimvau",
+        "cp15isb",
+        "bpiall",
+        "bpimva",
+        "dcimvac",
+        "dcisw",
+        "ats1cpr",
+        "ats1cpw",
+        "ats1cur",
+        "ats1cuw",
+        "ats12nsopr",
+        "ats12nsopw",
+        "ats12nsour",
+        "ats12nsouw",
+        "dccmvac",
+        "dccsw",
+        "cp15dsb",
+        "cp15dmb",
+        "dccmvau",
+        "dccimvac",
+        "dccisw",
+        "ats1hr",
+        "ats1hw",
+        "tlbiallis",
+        "tlbimvais",
+        "tlbiasidis",
+        "tlbimvaais",
+        "tlbimvalis",
+        "tlbimvaalis",
+        "itlbiall",
+        "itlbimva",
+        "itlbiasid",
+        "dtlbiall",
+        "dtlbimva",
+        "dtlbiasid",
+        "tlbiall",
+        "tlbimva",
+        "tlbiasid",
+        "tlbimvaa",
+        "tlbimval",
+        "tlbimvaal",
+        "tlbiipas2is",
+        "tlbiipas2lis",
+        "tlbiallhis",
+        "tlbimvahis",
+        "tlbiallnsnhis",
+        "tlbimvalhis",
+        "tlbiipas2",
+        "tlbiipas2l",
+        "tlbiallh",
+        "tlbimvah",
+        "tlbiallnsnh",
+        "tlbimvalh",
+        "pmcr",
+        "pmcntenset",
+        "pmcntenclr",
+        "pmovsr",
+        "pmswinc",
+        "pmselr",
+        "pmceid0",
+        "pmceid1",
+        "pmccntr",
+        "pmxevtyper",
+        "pmccfiltr",
+        "pmxevcntr",
+        "pmuserenr",
+        "pmintenset",
+        "pmintenclr",
+        "pmovsset",
+        "l2ctlr",
+        "l2ectlr",
+        "prrr",
+        "prrr_ns",
+        "prrr_s",
+        "mair0",
+        "mair0_ns",
+        "mair0_s",
+        "nmrr",
+        "nmrr_ns",
+        "nmrr_s",
+        "mair1",
+        "mair1_ns",
+        "mair1_s",
+        "amair0",
+        "amair0_ns",
+        "amair0_s",
+        "amair1",
+        "amair1_ns",
+        "amair1_s",
+        "hmair0",
+        "hmair1",
+        "hamair0",
+        "hamair1",
+        "vbar",
+        "vbar_ns",
+        "vbar_s",
+        "mvbar",
+        "rmr",
+        "isr",
+        "hvbar",
+        "fcseidr",
+        "contextidr",
+        "contextidr_ns",
+        "contextidr_s",
+        "tpidrurw",
+        "tpidrurw_ns",
+        "tpidrurw_s",
+        "tpidruro",
+        "tpidruro_ns",
+        "tpidruro_s",
+        "tpidrprw",
+        "tpidrprw_ns",
+        "tpidrprw_s",
+        "htpidr",
+        "cntfrq",
+        "cntkctl",
+        "cntp_tval",
+        "cntp_tval_ns",
+        "cntp_tval_s",
+        "cntp_ctl",
+        "cntp_ctl_ns",
+        "cntp_ctl_s",
+        "cntv_tval",
+        "cntv_ctl",
+        "cnthctl",
+        "cnthp_tval",
+        "cnthp_ctl",
+        "il1data0",
+        "il1data1",
+        "il1data2",
+        "il1data3",
+        "dl1data0",
+        "dl1data1",
+        "dl1data2",
+        "dl1data3",
+        "dl1data4",
+        "ramindex",
+        "l2actlr",
+        "cbar",
+        "httbr",
+        "vttbr",
+        "cntpct",
+        "cntvct",
+        "cntp_cval",
+        "cntp_cval_ns",
+        "cntp_cval_s",
+        "cntv_cval",
+        "cntvoff",
+        "cnthp_cval",
+        "cpumerrsr",
+        "l2merrsr",
+
+        // AArch64 registers (Op0=2)
+        "mdccint_el1",
+        "osdtrrx_el1",
+        "mdscr_el1",
+        "osdtrtx_el1",
+        "oseccr_el1",
+        "dbgbvr0_el1",
+        "dbgbvr1_el1",
+        "dbgbvr2_el1",
+        "dbgbvr3_el1",
+        "dbgbvr4_el1",
+        "dbgbvr5_el1",
+        "dbgbcr0_el1",
+        "dbgbcr1_el1",
+        "dbgbcr2_el1",
+        "dbgbcr3_el1",
+        "dbgbcr4_el1",
+        "dbgbcr5_el1",
+        "dbgwvr0_el1",
+        "dbgwvr1_el1",
+        "dbgwvr2_el1",
+        "dbgwvr3_el1",
+        "dbgwcr0_el1",
+        "dbgwcr1_el1",
+        "dbgwcr2_el1",
+        "dbgwcr3_el1",
+        "mdccsr_el0",
+        "mddtr_el0",
+        "mddtrtx_el0",
+        "mddtrrx_el0",
+        "dbgvcr32_el2",
+        "mdrar_el1",
+        "oslar_el1",
+        "oslsr_el1",
+        "osdlr_el1",
+        "dbgprcr_el1",
+        "dbgclaimset_el1",
+        "dbgclaimclr_el1",
+        "dbgauthstatus_el1",
+        "teecr32_el1",
+        "teehbr32_el1",
+
+        // AArch64 registers (Op0=1,3)
+        "midr_el1",
+        "mpidr_el1",
+        "revidr_el1",
+        "id_pfr0_el1",
+        "id_pfr1_el1",
+        "id_dfr0_el1",
+        "id_afr0_el1",
+        "id_mmfr0_el1",
+        "id_mmfr1_el1",
+        "id_mmfr2_el1",
+        "id_mmfr3_el1",
+        "id_isar0_el1",
+        "id_isar1_el1",
+        "id_isar2_el1",
+        "id_isar3_el1",
+        "id_isar4_el1",
+        "id_isar5_el1",
+        "mvfr0_el1",
+        "mvfr1_el1",
+        "mvfr2_el1",
+        "id_aa64pfr0_el1",
+        "id_aa64pfr1_el1",
+        "id_aa64dfr0_el1",
+        "id_aa64dfr1_el1",
+        "id_aa64afr0_el1",
+        "id_aa64afr1_el1",
+        "id_aa64isar0_el1",
+        "id_aa64isar1_el1",
+        "id_aa64mmfr0_el1",
+        "id_aa64mmfr1_el1",
+        "ccsidr_el1",
+        "clidr_el1",
+        "aidr_el1",
+        "csselr_el1",
+        "ctr_el0",
+        "dczid_el0",
+        "vpidr_el2",
+        "vmpidr_el2",
+        "sctlr_el1",
+        "actlr_el1",
+        "cpacr_el1",
+        "sctlr_el2",
+        "actlr_el2",
+        "hcr_el2",
+        "mdcr_el2",
+        "cptr_el2",
+        "hstr_el2",
+        "hacr_el2",
+        "sctlr_el3",
+        "actlr_el3",
+        "scr_el3",
+        "sder32_el3",
+        "cptr_el3",
+        "mdcr_el3",
+        "ttbr0_el1",
+        "ttbr1_el1",
+        "tcr_el1",
+        "ttbr0_el2",
+        "tcr_el2",
+        "vttbr_el2",
+        "vtcr_el2",
+        "ttbr0_el3",
+        "tcr_el3",
+        "dacr32_el2",
+        "spsr_el1",
+        "elr_el1",
+        "sp_el0",
+        "spsel",
+        "currentel",
+        "nzcv",
+        "daif",
+        "fpcr",
+        "fpsr",
+        "dspsr_el0",
+        "dlr_el0",
+        "spsr_el2",
+        "elr_el2",
+        "sp_el1",
+        "spsr_irq_aa64",
+        "spsr_abt_aa64",
+        "spsr_und_aa64",
+        "spsr_fiq_aa64",
+        "spsr_el3",
+        "elr_el3",
+        "sp_el2",
+        "afsr0_el1",
+        "afsr1_el1",
+        "esr_el1",
+        "ifsr32_el2",
+        "afsr0_el2",
+        "afsr1_el2",
+        "esr_el2",
+        "fpexc32_el2",
+        "afsr0_el3",
+        "afsr1_el3",
+        "esr_el3",
+        "far_el1",
+        "far_el2",
+        "hpfar_el2",
+        "far_el3",
+        "ic_ialluis",
+        "par_el1",
+        "ic_iallu",
+        "dc_ivac_xt",
+        "dc_isw_xt",
+        "at_s1e1r_xt",
+        "at_s1e1w_xt",
+        "at_s1e0r_xt",
+        "at_s1e0w_xt",
+        "dc_csw_xt",
+        "dc_cisw_xt",
+        "dc_zva_xt",
+        "ic_ivau_xt",
+        "dc_cvac_xt",
+        "dc_cvau_xt",
+        "dc_civac_xt",
+        "at_s1e2r_xt",
+        "at_s1e2w_xt",
+        "at_s12e1r_xt",
+        "at_s12e1w_xt",
+        "at_s12e0r_xt",
+        "at_s12e0w_xt",
+        "at_s1e3r_xt",
+        "at_s1e3w_xt",
+        "tlbi_vmalle1is",
+        "tlbi_vae1is_xt",
+        "tlbi_aside1is_xt",
+        "tlbi_vaae1is_xt",
+        "tlbi_vale1is_xt",
+        "tlbi_vaale1is_xt",
+        "tlbi_vmalle1",
+        "tlbi_vae1_xt",
+        "tlbi_aside1_xt",
+        "tlbi_vaae1_xt",
+        "tlbi_vale1_xt",
+        "tlbi_vaale1_xt",
+        "tlbi_ipas2e1is_xt",
+        "tlbi_ipas2le1is_xt",
+        "tlbi_alle2is",
+        "tlbi_vae2is_xt",
+        "tlbi_alle1is",
+        "tlbi_vale2is_xt",
+        "tlbi_vmalls12e1is",
+        "tlbi_ipas2e1_xt",
+        "tlbi_ipas2le1_xt",
+        "tlbi_alle2",
+        "tlbi_vae2_xt",
+        "tlbi_alle1",
+        "tlbi_vale2_xt",
+        "tlbi_vmalls12e1",
+        "tlbi_alle3is",
+        "tlbi_vae3is_xt",
+        "tlbi_vale3is_xt",
+        "tlbi_alle3",
+        "tlbi_vae3_xt",
+        "tlbi_vale3_xt",
+        "pmintenset_el1",
+        "pmintenclr_el1",
+        "pmcr_el0",
+        "pmcntenset_el0",
+        "pmcntenclr_el0",
+        "pmovsclr_el0",
+        "pmswinc_el0",
+        "pmselr_el0",
+        "pmceid0_el0",
+        "pmceid1_el0",
+        "pmccntr_el0",
+        "pmxevtyper_el0",
+        "pmccfiltr_el0",
+        "pmxevcntr_el0",
+        "pmuserenr_el0",
+        "pmovsset_el0",
+        "mair_el1",
+        "amair_el1",
+        "mair_el2",
+        "amair_el2",
+        "mair_el3",
+        "amair_el3",
+        "l2ctlr_el1",
+        "l2ectlr_el1",
+        "vbar_el1",
+        "rvbar_el1",
+        "isr_el1",
+        "vbar_el2",
+        "rvbar_el2",
+        "vbar_el3",
+        "rvbar_el3",
+        "rmr_el3",
+        "contextidr_el1",
+        "tpidr_el1",
+        "tpidr_el0",
+        "tpidrro_el0",
+        "tpidr_el2",
+        "tpidr_el3",
+        "cntkctl_el1",
+        "cntfrq_el0",
+        "cntpct_el0",
+        "cntvct_el0",
+        "cntp_tval_el0",
+        "cntp_ctl_el0",
+        "cntp_cval_el0",
+        "cntv_tval_el0",
+        "cntv_ctl_el0",
+        "cntv_cval_el0",
+        "pmevcntr0_el0",
+        "pmevcntr1_el0",
+        "pmevcntr2_el0",
+        "pmevcntr3_el0",
+        "pmevcntr4_el0",
+        "pmevcntr5_el0",
+        "pmevtyper0_el0",
+        "pmevtyper1_el0",
+        "pmevtyper2_el0",
+        "pmevtyper3_el0",
+        "pmevtyper4_el0",
+        "pmevtyper5_el0",
+        "cntvoff_el2",
+        "cnthctl_el2",
+        "cnthp_tval_el2",
+        "cnthp_ctl_el2",
+        "cnthp_cval_el2",
+        "cntps_tval_el1",
+        "cntps_ctl_el1",
+        "cntps_cval_el1",
+        "il1data0_el1",
+        "il1data1_el1",
+        "il1data2_el1",
+        "il1data3_el1",
+        "dl1data0_el1",
+        "dl1data1_el1",
+        "dl1data2_el1",
+        "dl1data3_el1",
+        "dl1data4_el1",
+        "l2actlr_el1",
+        "cpuactlr_el1",
+        "cpuectlr_el1",
+        "cpumerrsr_el1",
+        "l2merrsr_el1",
+        "cbar_el1",
+
+        // Dummy registers
+        "nop",
+        "raz",
+        "cp14_unimpl",
+        "cp15_unimpl",
+        "a64_unimpl",
+        "unknown"
     };
 
     static_assert(sizeof(miscRegName) / sizeof(*miscRegName) == NUM_MISCREGS,
                   "The miscRegName array and NUM_MISCREGS are inconsistent.");
 
     BitUnion32(CPSR)
-        Bitfield<31,30> nz;
+        Bitfield<31, 30> nz;
         Bitfield<29> c;
         Bitfield<28> v;
         Bitfield<27> q;
-        Bitfield<26,25> it1;
+        Bitfield<26, 25> it1;
         Bitfield<24> j;
+        Bitfield<23, 22> res0_23_22;
+        Bitfield<21> ss;        // AArch64
+        Bitfield<20> il;        // AArch64
         Bitfield<19, 16> ge;
-        Bitfield<15,10> it2;
+        Bitfield<15, 10> it2;
+        Bitfield<9> d;          // AArch64
         Bitfield<9> e;
         Bitfield<8> a;
         Bitfield<7> i;
         Bitfield<6> f;
+        Bitfield<9, 6> daif;    // AArch64
         Bitfield<5> t;
+        Bitfield<4> width;      // AArch64
+        Bitfield<3, 2> el;      // AArch64
         Bitfield<4, 0> mode;
+        Bitfield<0> sp;         // AArch64
     EndBitUnion(CPSR)
 
     // This mask selects bits of the CPSR that actually go in the CondCodes
@@ -352,32 +1404,190 @@ namespace ArmISA
     static const uint32_t CondCodesMask   = 0xF00F0000;
     static const uint32_t CpsrMaskQ       = 0x08000000;
 
+    BitUnion32(HDCR)
+        Bitfield<11>   tdra;
+        Bitfield<10>   tdosa;
+        Bitfield<9>    tda;
+        Bitfield<8>    tde;
+        Bitfield<7>    hpme;
+        Bitfield<6>    tpm;
+        Bitfield<5>    tpmcr;
+        Bitfield<4, 0> hpmn;
+    EndBitUnion(HDCR)
+
+    BitUnion32(HCPTR)
+        Bitfield<31> tcpac;
+        Bitfield<20> tta;
+        Bitfield<15> tase;
+        Bitfield<13> tcp13;
+        Bitfield<12> tcp12;
+        Bitfield<11> tcp11;
+        Bitfield<10> tcp10;
+        Bitfield<10> tfp;  // AArch64
+        Bitfield<9>  tcp9;
+        Bitfield<8>  tcp8;
+        Bitfield<7>  tcp7;
+        Bitfield<6>  tcp6;
+        Bitfield<5>  tcp5;
+        Bitfield<4>  tcp4;
+        Bitfield<3>  tcp3;
+        Bitfield<2>  tcp2;
+        Bitfield<1>  tcp1;
+        Bitfield<0>  tcp0;
+    EndBitUnion(HCPTR)
+
+    BitUnion32(HSTR)
+        Bitfield<17> tjdbx;
+        Bitfield<16> ttee;
+        Bitfield<15> t15;
+        Bitfield<13> t13;
+        Bitfield<12> t12;
+        Bitfield<11> t11;
+        Bitfield<10> t10;
+        Bitfield<9>  t9;
+        Bitfield<8>  t8;
+        Bitfield<7>  t7;
+        Bitfield<6>  t6;
+        Bitfield<5>  t5;
+        Bitfield<4>  t4;
+        Bitfield<3>  t3;
+        Bitfield<2>  t2;
+        Bitfield<1>  t1;
+        Bitfield<0>  t0;
+    EndBitUnion(HSTR)
+
+    BitUnion64(HCR)
+        Bitfield<33>     id;    // AArch64
+        Bitfield<32>     cd;    // AArch64
+        Bitfield<31>     rw;    // AArch64
+        Bitfield<30>     trvm;  // AArch64
+        Bitfield<29>     hcd;   // AArch64
+        Bitfield<28>     tdz;   // AArch64
+
+        Bitfield<27>     tge;
+        Bitfield<26>     tvm;
+        Bitfield<25>     ttlb;
+        Bitfield<24>     tpu;
+        Bitfield<23>     tpc;
+        Bitfield<22>     tsw;
+        Bitfield<21>     tac;
+        Bitfield<21>     tacr;  // AArch64
+        Bitfield<20>     tidcp;
+        Bitfield<19>     tsc;
+        Bitfield<18>     tid3;
+        Bitfield<17>     tid2;
+        Bitfield<16>     tid1;
+        Bitfield<15>     tid0;
+        Bitfield<14>     twe;
+        Bitfield<13>     twi;
+        Bitfield<12>     dc;
+        Bitfield<11, 10> bsu;
+        Bitfield<9>      fb;
+        Bitfield<8>      va;
+        Bitfield<8>      vse;   // AArch64
+        Bitfield<7>      vi;
+        Bitfield<6>      vf;
+        Bitfield<5>      amo;
+        Bitfield<4>      imo;
+        Bitfield<3>      fmo;
+        Bitfield<2>      ptw;
+        Bitfield<1>      swio;
+        Bitfield<0>      vm;
+    EndBitUnion(HCR)
+
+    BitUnion32(NSACR)
+        Bitfield<20> nstrcdis;
+        Bitfield<19> rfr;
+        Bitfield<15> nsasedis;
+        Bitfield<14> nsd32dis;
+        Bitfield<13> cp13;
+        Bitfield<12> cp12;
+        Bitfield<11> cp11;
+        Bitfield<10> cp10;
+        Bitfield<9>  cp9;
+        Bitfield<8>  cp8;
+        Bitfield<7>  cp7;
+        Bitfield<6>  cp6;
+        Bitfield<5>  cp5;
+        Bitfield<4>  cp4;
+        Bitfield<3>  cp3;
+        Bitfield<2>  cp2;
+        Bitfield<1>  cp1;
+        Bitfield<0>  cp0;
+    EndBitUnion(NSACR)
+
+    BitUnion32(SCR)
+        Bitfield<13> twe;
+        Bitfield<12> twi;
+        Bitfield<11> st;  // AArch64
+        Bitfield<10> rw;  // AArch64
+        Bitfield<9> sif;
+        Bitfield<8> hce;
+        Bitfield<7> scd;
+        Bitfield<7> smd;  // AArch64
+        Bitfield<6> nEt;
+        Bitfield<5> aw;
+        Bitfield<4> fw;
+        Bitfield<3> ea;
+        Bitfield<2> fiq;
+        Bitfield<1> irq;
+        Bitfield<0> ns;
+    EndBitUnion(SCR)
+
     BitUnion32(SCTLR)
-        Bitfield<31> ie;  // Instruction endianness
-        Bitfield<30> te;  // Thumb Exception Enable
-        Bitfield<29> afe; // Access flag enable
-        Bitfield<28> tre; // TEX Remap bit
-        Bitfield<27> nmfi;// Non-maskable fast interrupts enable
-        Bitfield<25> ee;  // Exception Endianness bit
-        Bitfield<24> ve;  // Interrupt vectors enable
-        Bitfield<23> xp; //  Extended page table enable bit
-        Bitfield<22> u;   // Alignment (now unused)
-        Bitfield<21> fi;  // Fast interrupts configuration enable
-        Bitfield<19> dz;  // Divide by Zero fault enable bit
-        Bitfield<18> rao2;// Read as one
-        Bitfield<17> br;  // Background region bit
-        Bitfield<16> rao3;// Read as one
-        Bitfield<14> rr;  // Round robin cache replacement
-        Bitfield<13> v;   // Base address for exception vectors
-        Bitfield<12> i;   // instruction cache enable
-        Bitfield<11> z;   // branch prediction enable bit
-        Bitfield<10> sw;  // Enable swp/swpb
-        Bitfield<9,8> rs;   // deprecated protection bits
-        Bitfield<6,3> rao4;// Read as one
-        Bitfield<7>  b;   // Endianness support (unused)
-        Bitfield<2>  c;   // Cache enable bit
-        Bitfield<1>  a;   // Alignment fault checking
-        Bitfield<0>  m;   // MMU enable bit
+        Bitfield<30>   te;      // Thumb Exception Enable (AArch32 only)
+        Bitfield<29>   afe;     // Access flag enable (AArch32 only)
+        Bitfield<28>   tre;     // TEX remap enable (AArch32 only)
+        Bitfield<27>   nmfi;    // Non-maskable FIQ support (ARMv7 only)
+        Bitfield<26>   uci;     // Enable EL0 access to DC CVAU, DC CIVAC,
+                                // DC CVAC and IC IVAU instructions
+                                // (AArch64 SCTLR_EL1 only)
+        Bitfield<25>   ee;      // Exception Endianness
+        Bitfield<24>   ve;      // Interrupt Vectors Enable (ARMv7 only)
+        Bitfield<24>   e0e;     // Endianness of explicit data accesses at EL0
+                                // (AArch64 SCTLR_EL1 only)
+        Bitfield<23>   xp;      // Extended page table enable (dropped in ARMv7)
+        Bitfield<22>   u;       // Alignment (dropped in ARMv7)
+        Bitfield<21>   fi;      // Fast interrupts configuration enable
+                                // (ARMv7 only)
+        Bitfield<20>   uwxn;    // Unprivileged write permission implies EL1 XN
+                                // (AArch32 only)
+        Bitfield<19>   dz;      // Divide by Zero fault enable
+                                // (dropped in ARMv7)
+        Bitfield<19>   wxn;     // Write permission implies XN
+        Bitfield<18>   ntwe;    // Not trap WFE
+                                // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only)
+        Bitfield<18>   rao2;    // Read as one
+        Bitfield<16>   ntwi;    // Not trap WFI
+                                // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only)
+        Bitfield<16>   rao3;    // Read as one
+        Bitfield<15>   uct;     // Enable EL0 access to CTR_EL0
+                                // (AArch64 SCTLR_EL1 only)
+        Bitfield<14>   rr;      // Round Robin select (ARMv7 only)
+        Bitfield<14>   dze;     // Enable EL0 access to DC ZVA
+                                // (AArch64 SCTLR_EL1 only)
+        Bitfield<13>   v;       // Vectors bit (AArch32 only)
+        Bitfield<12>   i;       // Instruction cache enable
+        Bitfield<11>   z;       // Branch prediction enable (ARMv7 only)
+        Bitfield<10>   sw;      // SWP/SWPB enable (ARMv7 only)
+        Bitfield<9, 8> rs;      // Deprecated protection bits (dropped in ARMv7)
+        Bitfield<9>    uma;     // User mask access (AArch64 SCTLR_EL1 only)
+        Bitfield<8>    sed;     // SETEND disable
+                                // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only)
+        Bitfield<7>    b;       // Endianness support (dropped in ARMv7)
+        Bitfield<7>    itd;     // IT disable
+                                // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only)
+        Bitfield<6, 3> rao4;    // Read as one
+        Bitfield<6>    thee;    // ThumbEE enable
+                                // (ARMv8 AArch32 and AArch64 SCTLR_EL1 only)
+        Bitfield<5>    cp15ben; // CP15 barrier enable
+                                // (AArch32 and AArch64 SCTLR_EL1 only)
+        Bitfield<4>    sa0;     // Stack Alignment Check Enable for EL0
+                                // (AArch64 SCTLR_EL1 only)
+        Bitfield<3>    sa;      // Stack Alignment Check Enable (AArch64 only)
+        Bitfield<2>    c;       // Cache enable
+        Bitfield<1>    a;       // Alignment check enable
+        Bitfield<0>    m;       // MMU enable
     EndBitUnion(SCTLR)
 
     BitUnion32(CPACR)
@@ -392,20 +1602,25 @@ namespace ArmISA
         Bitfield<17, 16> cp8;
         Bitfield<19, 18> cp9;
         Bitfield<21, 20> cp10;
+        Bitfield<21, 20> fpen;  // AArch64
         Bitfield<23, 22> cp11;
         Bitfield<25, 24> cp12;
         Bitfield<27, 26> cp13;
         Bitfield<29, 28> rsvd;
+        Bitfield<28> tta;  // AArch64
         Bitfield<30> d32dis;
         Bitfield<31> asedis;
     EndBitUnion(CPACR)
 
     BitUnion32(FSR)
         Bitfield<3, 0> fsLow;
+        Bitfield<5, 0> status;  // LPAE
         Bitfield<7, 4> domain;
+        Bitfield<9> lpae;
         Bitfield<10> fsHigh;
         Bitfield<11> wnr;
         Bitfield<12> ext;
+        Bitfield<13> cm;  // LPAE
     EndBitUnion(FSR)
 
     BitUnion32(FPSCR)
@@ -470,6 +1685,52 @@ namespace ArmISA
         Bitfield<31, 28> raz;
     EndBitUnion(MVFR1)
 
+    BitUnion64(TTBCR)
+        // Short-descriptor translation table format
+        Bitfield<2, 0> n;
+        Bitfield<4> pd0;
+        Bitfield<5> pd1;
+        // Long-descriptor translation table format
+        Bitfield<5, 0> t0sz;
+        Bitfield<7> epd0;
+        Bitfield<9, 8> irgn0;
+        Bitfield<11, 10> orgn0;
+        Bitfield<13, 12> sh0;
+        Bitfield<14> tg0;
+        Bitfield<21, 16> t1sz;
+        Bitfield<22> a1;
+        Bitfield<23> epd1;
+        Bitfield<25, 24> irgn1;
+        Bitfield<27, 26> orgn1;
+        Bitfield<29, 28> sh1;
+        Bitfield<30> tg1;
+        Bitfield<34, 32> ips;
+        Bitfield<36> as;
+        Bitfield<37> tbi0;
+        Bitfield<38> tbi1;
+        // Common
+        Bitfield<31> eae;
+        // TCR_EL2/3 (AArch64)
+        Bitfield<18, 16> ps;
+        Bitfield<20> tbi;
+    EndBitUnion(TTBCR)
+
+    BitUnion32(HTCR)
+        Bitfield<2, 0> t0sz;
+        Bitfield<9, 8> irgn0;
+        Bitfield<11, 10> orgn0;
+        Bitfield<13, 12> sh0;
+    EndBitUnion(HTCR)
+
+    BitUnion32(VTCR_t)
+        Bitfield<3, 0> t0sz;
+        Bitfield<4> s;
+        Bitfield<7, 6> sl0;
+        Bitfield<9, 8> irgn0;
+        Bitfield<11, 10> orgn0;
+        Bitfield<13, 12> sh0;
+    EndBitUnion(VTCR_t)
+
     BitUnion32(PRRR)
        Bitfield<1,0> tr0;
        Bitfield<3,2> tr1;
@@ -544,6 +1805,72 @@ namespace ArmISA
       Bitfield<28>    raz_28;
       Bitfield<31,29> format;
    EndBitUnion(CTR)
+
+   BitUnion32(PMSELR)
+      Bitfield<4, 0> sel;
+   EndBitUnion(PMSELR)
+
+    BitUnion64(PAR)
+        // 64-bit format
+        Bitfield<63, 56> attr;
+        Bitfield<39, 12> pa;
+        Bitfield<11>     lpae;
+        Bitfield<9>      ns;
+        Bitfield<8, 7>   sh;
+        Bitfield<0>      f;
+   EndBitUnion(PAR)
+
+   BitUnion32(ESR)
+        Bitfield<31, 26> ec;
+        Bitfield<25> il;
+        Bitfield<15, 0> imm16;
+   EndBitUnion(ESR)
+
+   BitUnion32(CPTR)
+        Bitfield<31> tcpac;
+        Bitfield<20> tta;
+        Bitfield<13, 12> res1_13_12_el2;
+        Bitfield<10> tfp;
+        Bitfield<9, 0> res1_9_0_el2;
+   EndBitUnion(CPTR)
+
+
+    // Checks read access permissions to coproc. registers
+    bool canReadCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr,
+                          ThreadContext *tc);
+
+    // Checks write access permissions to coproc. registers
+    bool canWriteCoprocReg(MiscRegIndex reg, SCR scr, CPSR cpsr,
+                           ThreadContext *tc);
+
+    // Checks read access permissions to AArch64 system registers
+    bool canReadAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr,
+                              ThreadContext *tc);
+
+    // Checks write access permissions to AArch64 system registers
+    bool canWriteAArch64SysReg(MiscRegIndex reg, SCR scr, CPSR cpsr,
+                               ThreadContext *tc);
+
+    // Uses just the scr.ns bit to pre flatten the misc regs. This is useful
+    // for MCR/MRC instructions
+    int
+    flattenMiscRegNsBanked(int reg, ThreadContext *tc);
+
+    // Flattens a misc reg index using the specified security state. This is
+    // used for opperations (eg address translations) where the security
+    // state of the register access may differ from the current state of the
+    // processor
+    int
+    flattenMiscRegNsBanked(int reg, ThreadContext *tc, bool ns);
+
+    // Takes a misc reg index and returns the root reg if its one of a set of
+    // banked registers
+    void
+    preUnflattenMiscReg();
+
+    int
+    unflattenMiscReg(int reg);
+
 }
 
 #endif // __ARCH_ARM_MISCREGS_HH__
diff --git a/src/arch/arm/nativetrace.cc b/src/arch/arm/nativetrace.cc
index 21dff8b7c..9ba3fa84a 100644
--- a/src/arch/arm/nativetrace.cc
+++ b/src/arch/arm/nativetrace.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2011 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -124,7 +124,7 @@ Trace::ArmNativeTrace::ThreadState::update(ThreadContext *tc)
     newState[STATE_CPSR] = cpsr;
     changed[STATE_CPSR] = (newState[STATE_CPSR] != oldState[STATE_CPSR]);
 
-    for (int i = 0; i < NumFloatArchRegs; i += 2) {
+    for (int i = 0; i < NumFloatV7ArchRegs; i += 2) {
         newState[STATE_F0 + (i >> 1)] =
             static_cast<uint64_t>(tc->readFloatRegBits(i + 1)) << 32 |
             tc->readFloatRegBits(i);
diff --git a/src/arch/arm/pagetable.hh b/src/arch/arm/pagetable.hh
index 898ab3191..591ec9807 100644
--- a/src/arch/arm/pagetable.hh
+++ b/src/arch/arm/pagetable.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -43,6 +43,8 @@
 #ifndef __ARCH_ARM_PAGETABLE_H__
 #define __ARCH_ARM_PAGETABLE_H__
 
+#include <cstdint>
+
 #include "arch/arm/isa_traits.hh"
 #include "arch/arm/utility.hh"
 #include "arch/arm/vtophys.hh"
@@ -71,68 +73,106 @@ struct PTE
 
 };
 
+// Lookup level
+enum LookupLevel {
+    L0 = 0,  // AArch64 only
+    L1,
+    L2,
+    L3,
+    MAX_LOOKUP_LEVELS
+};
+
 // ITB/DTB table entry
 struct TlbEntry
 {
   public:
-    enum MemoryType {
+    enum class MemoryType : std::uint8_t {
         StronglyOrdered,
         Device,
         Normal
     };
-    enum DomainType {
-        DomainNoAccess = 0,
-        DomainClient,
-        DomainReserved,
-        DomainManager
+
+    enum class DomainType : std::uint8_t {
+        NoAccess = 0,
+        Client,
+        Reserved,
+        Manager
     };
 
     // Matching variables
     Addr pfn;
     Addr size;              // Size of this entry, == Type of TLB Rec
     Addr vpn;               // Virtual Page Number
-    uint32_t asid;          // Address Space Identifier
+    uint64_t attributes;    // Memory attributes formatted for PAR
+
+    LookupLevel lookupLevel;    // Lookup level where the descriptor was fetched
+                                // from.  Used to set the FSR for faults
+                                // occurring while the long desc. format is in
+                                // use (AArch32 w/ LPAE and AArch64)
+
+    uint16_t asid;          // Address Space Identifier
+    uint8_t vmid;           // Virtual machine Identifier
     uint8_t N;              // Number of bits in pagesize
+    uint8_t innerAttrs;
+    uint8_t outerAttrs;
+    uint8_t ap;             // Access permissions bits
+    uint8_t hap;            // Hyp access permissions bits
+    DomainType domain;         // Access Domain
+
+    MemoryType mtype;
+
+    // True if the long descriptor format is used for this entry (LPAE only)
+    bool longDescFormat; // @todo use this in the update attribute bethod
+
+    bool isHyp;
     bool global;
     bool valid;
 
+    // True if the entry targets the non-secure physical address space
+    bool ns;
+    // True if the entry was brought in from a non-secure page table
+    bool nstid;
+    // Exception level on insert, AARCH64 EL0&1, AARCH32 -> el=1
+    uint8_t el;
+
     // Type of memory
     bool nonCacheable;     // Can we wrap this in mtype?
-    bool sNp;      // Section descriptor
 
     // Memory Attributes
-    MemoryType mtype;
-    uint8_t innerAttrs;
-    uint8_t outerAttrs;
     bool shareable;
-    uint32_t attributes;    // Memory attributes formatted for PAR
-
+    bool outerShareable;
 
     // Access permissions
     bool xn;                // Execute Never
-    uint8_t ap;           // Access permissions bits
-    uint8_t domain;       // Access Domain
+    bool pxn;               // Privileged Execute Never (LPAE only)
 
     //Construct an entry that maps to physical address addr for SE mode
-    TlbEntry(Addr _asn, Addr _vaddr, Addr _paddr)
+    TlbEntry(Addr _asn, Addr _vaddr, Addr _paddr) :
+         pfn(_paddr >> PageShift), size(PageBytes - 1), vpn(_vaddr >> PageShift),
+         attributes(0), lookupLevel(L1), asid(_asn), vmid(0), N(0),
+         innerAttrs(0), outerAttrs(0), ap(0), hap(0x3),
+         domain(DomainType::Client),  mtype(MemoryType::StronglyOrdered),
+         longDescFormat(false), isHyp(false), global(false), valid(true),
+         ns(true), nstid(true), el(0), nonCacheable(false), shareable(false),
+         outerShareable(false), xn(0), pxn(0)
     {
-        pfn = _paddr >> PageShift;
-        size = PageBytes - 1;
-        asid = _asn;
-        global = false;
-        valid = true;
+        // no restrictions by default, hap = 0x3
 
-        vpn = _vaddr >> PageShift;
-
-        nonCacheable = sNp = false;
-
-        xn = 0;
-        ap = 0; // ???
-        domain = DomainClient; //???
+        // @todo Check the memory type
     }
 
-    TlbEntry()
-    {}
+    TlbEntry() :
+         pfn(0), size(0), vpn(0), attributes(0), lookupLevel(L1), asid(0),
+         vmid(0), N(0), innerAttrs(0), outerAttrs(0), ap(0), hap(0x3),
+         domain(DomainType::Client), mtype(MemoryType::StronglyOrdered),
+         longDescFormat(false), isHyp(false), global(false), valid(true),
+         ns(true), nstid(true), el(0), nonCacheable(false),
+         shareable(false), outerShareable(false), xn(0), pxn(0)
+    {
+        // no restrictions by default, hap = 0x3
+
+        // @todo Check the memory type
+    }
 
     void
     updateVaddr(Addr new_vaddr)
@@ -141,67 +181,165 @@ struct TlbEntry
     }
 
     Addr
-    pageStart()
+    pageStart() const
     {
         return pfn << PageShift;
     }
 
     bool
-    match(Addr va, uint8_t cid)
+    match(Addr va, uint8_t _vmid, bool hypLookUp, bool secure_lookup,
+          uint8_t target_el) const
     {
+        return match(va, 0, _vmid, hypLookUp, secure_lookup, true, target_el);
+    }
+
+    bool
+    match(Addr va, uint16_t asn, uint8_t _vmid, bool hypLookUp,
+          bool secure_lookup, bool ignore_asn, uint8_t target_el) const
+    {
+        bool match = false;
         Addr v = vpn << N;
-        if (valid && va >= v && va <= v + size && (global || cid == asid))
-            return true;
-        return false;
+
+        if (valid && va >= v && va <= v + size && (secure_lookup == !nstid) &&
+            (hypLookUp == isHyp))
+        {
+            if (target_el == 2 || target_el == 3)
+                match = (el == target_el);
+            else
+                match = (el == 0) || (el == 1);
+            if (match && !ignore_asn) {
+                match = global || (asn == asid);
+            }
+            if (match && nstid) {
+                match = isHyp || (_vmid == vmid);
+            }
+        }
+        return match;
     }
 
     Addr
-    pAddr(Addr va)
+    pAddr(Addr va) const
     {
         return (pfn << N) | (va & size);
     }
 
+    void
+    updateAttributes()
+    {
+        uint64_t mask;
+        uint64_t newBits;
+
+        // chec bit 11 to determine if its currently LPAE or VMSA format.
+        if ( attributes & (1 << 11) ) {
+            newBits = ((outerShareable ? 0x2 :
+                      shareable         ? 0x3 : 0) << 7);
+            mask = 0x180;
+        } else {
+            /** Formatting for Physical Address Register (PAR)
+             *  Only including lower bits (TLB info here)
+             *  PAR (32-bit format):
+             *  PA   [31:12]
+             *  LPAE [11] (Large Physical Address Extension)
+             *  TLB info [10:1]
+             *      NOS  [10] (Not Outer Sharable)
+             *      NS   [9]  (Non-Secure)
+             *      --   [8]  (Implementation Defined)
+             *      SH   [7]  (Sharable)
+             *      Inner[6:4](Inner memory attributes)
+             *      Outer[3:2](Outer memory attributes)
+             *      SS   [1]  (SuperSection)
+             *      F    [0]  (Fault, Fault Status in [6:1] if faulted)
+            */
+            newBits = ((outerShareable ? 0:1) << 10) |
+                      ((shareable ? 1:0) << 7) |
+                      (innerAttrs << 4) |
+                      (outerAttrs << 2);
+                      // TODO: Supersection bit
+            mask = 0x4FC;
+        }
+        // common bits
+        newBits |= ns << 9;  // NS bit
+        mask    |= 1  << 9;
+        // add in the new bits
+        attributes &= ~mask;
+        attributes |= newBits;
+    }
+
+    void
+    setAttributes(bool lpae)
+    {
+        attributes = lpae ? (1 << 11) : 0;
+        updateAttributes();
+    }
+
+    std::string
+    print() const
+    {
+        return csprintf("%#x, asn %d vmn %d hyp %d ppn %#x size: %#x ap:%d "
+                        "ns:%d nstid:%d g:%d el:%d", vpn << N, asid, vmid,
+                        isHyp, pfn << N, size, ap, ns, nstid, global, el);
+    }
+
     void
     serialize(std::ostream &os)
     {
+        SERIALIZE_SCALAR(longDescFormat);
         SERIALIZE_SCALAR(pfn);
         SERIALIZE_SCALAR(size);
         SERIALIZE_SCALAR(vpn);
         SERIALIZE_SCALAR(asid);
+        SERIALIZE_SCALAR(vmid);
+        SERIALIZE_SCALAR(isHyp);
         SERIALIZE_SCALAR(N);
         SERIALIZE_SCALAR(global);
         SERIALIZE_SCALAR(valid);
+        SERIALIZE_SCALAR(ns);
+        SERIALIZE_SCALAR(nstid);
         SERIALIZE_SCALAR(nonCacheable);
-        SERIALIZE_SCALAR(sNp);
+        SERIALIZE_ENUM(lookupLevel);
         SERIALIZE_ENUM(mtype);
         SERIALIZE_SCALAR(innerAttrs);
         SERIALIZE_SCALAR(outerAttrs);
         SERIALIZE_SCALAR(shareable);
+        SERIALIZE_SCALAR(outerShareable);
         SERIALIZE_SCALAR(attributes);
         SERIALIZE_SCALAR(xn);
+        SERIALIZE_SCALAR(pxn);
         SERIALIZE_SCALAR(ap);
-        SERIALIZE_SCALAR(domain);
+        SERIALIZE_SCALAR(hap);
+        uint8_t domain_ = static_cast<uint8_t>(domain);
+        paramOut(os, "domain", domain_);
     }
     void
     unserialize(Checkpoint *cp, const std::string &section)
     {
+        UNSERIALIZE_SCALAR(longDescFormat);
         UNSERIALIZE_SCALAR(pfn);
         UNSERIALIZE_SCALAR(size);
         UNSERIALIZE_SCALAR(vpn);
         UNSERIALIZE_SCALAR(asid);
+        UNSERIALIZE_SCALAR(vmid);
+        UNSERIALIZE_SCALAR(isHyp);
         UNSERIALIZE_SCALAR(N);
         UNSERIALIZE_SCALAR(global);
         UNSERIALIZE_SCALAR(valid);
+        UNSERIALIZE_SCALAR(ns);
+        UNSERIALIZE_SCALAR(nstid);
         UNSERIALIZE_SCALAR(nonCacheable);
-        UNSERIALIZE_SCALAR(sNp);
+        UNSERIALIZE_ENUM(lookupLevel);
         UNSERIALIZE_ENUM(mtype);
         UNSERIALIZE_SCALAR(innerAttrs);
         UNSERIALIZE_SCALAR(outerAttrs);
         UNSERIALIZE_SCALAR(shareable);
+        UNSERIALIZE_SCALAR(outerShareable);
         UNSERIALIZE_SCALAR(attributes);
         UNSERIALIZE_SCALAR(xn);
+        UNSERIALIZE_SCALAR(pxn);
         UNSERIALIZE_SCALAR(ap);
-        UNSERIALIZE_SCALAR(domain);
+        UNSERIALIZE_SCALAR(hap);
+        uint8_t domain_;
+        paramIn(cp, section, "domain", domain_);
+        domain = static_cast<DomainType>(domain_);
     }
 
 };
diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index 37999c905..dd23a5e21 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -60,6 +60,12 @@ using namespace ArmISA;
 ArmLiveProcess::ArmLiveProcess(LiveProcessParams *params, ObjectFile *objFile,
                                ObjectFile::Arch _arch)
     : LiveProcess(params, objFile), arch(_arch)
+{
+}
+
+ArmLiveProcess32::ArmLiveProcess32(LiveProcessParams *params,
+                                   ObjectFile *objFile, ObjectFile::Arch _arch)
+    : ArmLiveProcess(params, objFile, _arch)
 {
     stack_base = 0xbf000000L;
 
@@ -74,11 +80,28 @@ ArmLiveProcess::ArmLiveProcess(LiveProcessParams *params, ObjectFile *objFile,
     mmap_start = mmap_end = 0x40000000L;
 }
 
+ArmLiveProcess64::ArmLiveProcess64(LiveProcessParams *params,
+                                   ObjectFile *objFile, ObjectFile::Arch _arch)
+    : ArmLiveProcess(params, objFile, _arch)
+{
+    stack_base = 0x7fffff0000L;
+
+    // Set pointer for next thread stack.  Reserve 8M for main stack.
+    next_thread_stack_base = stack_base - (8 * 1024 * 1024);
+
+    // Set up break point (Top of Heap)
+    brk_point = objFile->dataBase() + objFile->dataSize() + objFile->bssSize();
+    brk_point = roundUp(brk_point, VMPageSize);
+
+    // Set up region for mmaps. For now, start at bottom of kuseg space.
+    mmap_start = mmap_end = 0x4000000000L;
+}
+
 void
-ArmLiveProcess::initState()
+ArmLiveProcess32::initState()
 {
     LiveProcess::initState();
-    argsInit(MachineBytes, VMPageSize);
+    argsInit<uint32_t>(VMPageSize, INTREG_SP);
     for (int i = 0; i < contextIds.size(); i++) {
         ThreadContext * tc = system->getThreadContext(contextIds[i]);
         CPACR cpacr = tc->readMiscReg(MISCREG_CPACR);
@@ -94,9 +117,34 @@ ArmLiveProcess::initState()
 }
 
 void
-ArmLiveProcess::argsInit(int intSize, int pageSize)
+ArmLiveProcess64::initState()
 {
-    typedef AuxVector<uint32_t> auxv_t;
+    LiveProcess::initState();
+    argsInit<uint64_t>(VMPageSize, INTREG_SP0);
+    for (int i = 0; i < contextIds.size(); i++) {
+        ThreadContext * tc = system->getThreadContext(contextIds[i]);
+        CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+        cpsr.mode = MODE_EL0T;
+        tc->setMiscReg(MISCREG_CPSR, cpsr);
+        CPACR cpacr = tc->readMiscReg(MISCREG_CPACR_EL1);
+        // Enable the floating point coprocessors.
+        cpacr.cp10 = 0x3;
+        cpacr.cp11 = 0x3;
+        tc->setMiscReg(MISCREG_CPACR_EL1, cpacr);
+        // Generically enable floating point support.
+        FPEXC fpexc = tc->readMiscReg(MISCREG_FPEXC);
+        fpexc.en = 1;
+        tc->setMiscReg(MISCREG_FPEXC, fpexc);
+    }
+}
+
+template <class IntType>
+void
+ArmLiveProcess::argsInit(int pageSize, IntRegIndex spIndex)
+{
+    int intSize = sizeof(IntType);
+
+    typedef AuxVector<IntType> auxv_t;
     std::vector<auxv_t> auxv;
 
     string filename;
@@ -133,7 +181,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize)
     //Auxilliary vectors are loaded only for elf formatted executables.
     ElfObject * elfObject = dynamic_cast<ElfObject *>(objFile);
     if (elfObject) {
-        uint32_t features =
+        IntType features =
             Arm_Swp |
             Arm_Half |
             Arm_Thumb |
@@ -253,16 +301,16 @@ ArmLiveProcess::argsInit(int intSize, int pageSize)
     allocateMem(roundDown(stack_min, pageSize), roundUp(stack_size, pageSize));
 
     // map out initial stack contents
-    uint32_t sentry_base = stack_base - sentry_size;
-    uint32_t aux_data_base = sentry_base - aux_data_size;
-    uint32_t env_data_base = aux_data_base - env_data_size;
-    uint32_t arg_data_base = env_data_base - arg_data_size;
-    uint32_t platform_base = arg_data_base - platform_size;
-    uint32_t aux_random_base = platform_base - aux_random_size;
-    uint32_t auxv_array_base = aux_random_base - aux_array_size - aux_padding;
-    uint32_t envp_array_base = auxv_array_base - envp_array_size;
-    uint32_t argv_array_base = envp_array_base - argv_array_size;
-    uint32_t argc_base = argv_array_base - argc_size;
+    IntType sentry_base = stack_base - sentry_size;
+    IntType aux_data_base = sentry_base - aux_data_size;
+    IntType env_data_base = aux_data_base - env_data_size;
+    IntType arg_data_base = env_data_base - arg_data_size;
+    IntType platform_base = arg_data_base - platform_size;
+    IntType aux_random_base = platform_base - aux_random_size;
+    IntType auxv_array_base = aux_random_base - aux_array_size - aux_padding;
+    IntType envp_array_base = auxv_array_base - envp_array_size;
+    IntType argv_array_base = envp_array_base - argv_array_size;
+    IntType argc_base = argv_array_base - argc_size;
 
     DPRINTF(Stack, "The addresses of items on the initial stack:\n");
     DPRINTF(Stack, "0x%x - aux data\n", aux_data_base);
@@ -279,11 +327,11 @@ ArmLiveProcess::argsInit(int intSize, int pageSize)
     // write contents to stack
 
     // figure out argc
-    uint32_t argc = argv.size();
-    uint32_t guestArgc = ArmISA::htog(argc);
+    IntType argc = argv.size();
+    IntType guestArgc = ArmISA::htog(argc);
 
     //Write out the sentry void *
-    uint32_t sentry_NULL = 0;
+    IntType sentry_NULL = 0;
     initVirtMem.writeBlob(sentry_base,
             (uint8_t*)&sentry_NULL, sentry_size);
 
@@ -302,8 +350,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize)
     }
 
     //Copy the aux stuff
-    for(int x = 0; x < auxv.size(); x++)
-    {
+    for (int x = 0; x < auxv.size(); x++) {
         initVirtMem.writeBlob(auxv_array_base + x * 2 * intSize,
                 (uint8_t*)&(auxv[x].a_type), intSize);
         initVirtMem.writeBlob(auxv_array_base + (x * 2 + 1) * intSize,
@@ -321,7 +368,7 @@ ArmLiveProcess::argsInit(int intSize, int pageSize)
 
     ThreadContext *tc = system->getThreadContext(contextIds[0]);
     //Set the stack pointer register
-    tc->setIntReg(StackPointerReg, stack_min);
+    tc->setIntReg(spIndex, stack_min);
     //A pointer to a function to run when the program exits. We'll set this
     //to zero explicitly to make sure this isn't used.
     tc->setIntReg(ArgumentReg0, 0);
@@ -342,6 +389,8 @@ ArmLiveProcess::argsInit(int intSize, int pageSize)
     PCState pc;
     pc.thumb(arch == ObjectFile::Thumb);
     pc.nextThumb(pc.thumb());
+    pc.aarch64(arch == ObjectFile::Arm64);
+    pc.nextAArch64(pc.aarch64());
     pc.set(objFile->entryPoint() & ~mask(1));
     tc->pcState(pc);
 
@@ -350,14 +399,21 @@ ArmLiveProcess::argsInit(int intSize, int pageSize)
 }
 
 ArmISA::IntReg
-ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i)
+ArmLiveProcess32::getSyscallArg(ThreadContext *tc, int &i)
 {
     assert(i < 6);
     return tc->readIntReg(ArgumentReg0 + i++);
 }
 
-uint64_t
-ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width)
+ArmISA::IntReg
+ArmLiveProcess64::getSyscallArg(ThreadContext *tc, int &i)
+{
+    assert(i < 8);
+    return tc->readIntReg(ArgumentReg0 + i++);
+}
+
+ArmISA::IntReg
+ArmLiveProcess32::getSyscallArg(ThreadContext *tc, int &i, int width)
 {
     assert(width == 32 || width == 64);
     if (width == 32)
@@ -375,17 +431,37 @@ ArmLiveProcess::getSyscallArg(ThreadContext *tc, int &i, int width)
     return val;
 }
 
+ArmISA::IntReg
+ArmLiveProcess64::getSyscallArg(ThreadContext *tc, int &i, int width)
+{
+    return getSyscallArg(tc, i);
+}
+
 
 void
-ArmLiveProcess::setSyscallArg(ThreadContext *tc,
-        int i, ArmISA::IntReg val)
+ArmLiveProcess32::setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val)
 {
-    assert(i < 4);
+    assert(i < 6);
     tc->setIntReg(ArgumentReg0 + i, val);
 }
 
 void
-ArmLiveProcess::setSyscallReturn(ThreadContext *tc,
+ArmLiveProcess64::setSyscallArg(ThreadContext *tc,
+        int i, ArmISA::IntReg val)
+{
+    assert(i < 8);
+    tc->setIntReg(ArgumentReg0 + i, val);
+}
+
+void
+ArmLiveProcess32::setSyscallReturn(ThreadContext *tc,
+        SyscallReturn return_value)
+{
+    tc->setIntReg(ReturnValueReg, return_value.value());
+}
+
+void
+ArmLiveProcess64::setSyscallReturn(ThreadContext *tc,
         SyscallReturn return_value)
 {
     tc->setIntReg(ReturnValueReg, return_value.value());
diff --git a/src/arch/arm/process.hh b/src/arch/arm/process.hh
index f8d821037..34ce1dd02 100644
--- a/src/arch/arm/process.hh
+++ b/src/arch/arm/process.hh
@@ -1,4 +1,16 @@
 /*
+* Copyright (c) 2012 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2007-2008 The Florida State University
  * All rights reserved.
  *
@@ -34,6 +46,7 @@
 #include <string>
 #include <vector>
 
+#include "arch/arm/intregs.hh"
 #include "base/loader/object_file.hh"
 #include "sim/process.hh"
 
@@ -47,11 +60,37 @@ class ArmLiveProcess : public LiveProcess
     ObjectFile::Arch arch;
     ArmLiveProcess(LiveProcessParams * params, ObjectFile *objFile,
                    ObjectFile::Arch _arch);
+    template<class IntType>
+    void argsInit(int pageSize, ArmISA::IntRegIndex spIndex);
+};
+
+class ArmLiveProcess32 : public ArmLiveProcess
+{
+  protected:
+    ObjectFile::Arch arch;
+    ArmLiveProcess32(LiveProcessParams * params, ObjectFile *objFile,
+                     ObjectFile::Arch _arch);
+
+    void initState();
+
+  public:
+
+    ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i, int width);
+    ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i);
+    void setSyscallArg(ThreadContext *tc, int i, ArmISA::IntReg val);
+    void setSyscallReturn(ThreadContext *tc, SyscallReturn return_value);
+};
+
+class ArmLiveProcess64 : public ArmLiveProcess
+{
+  protected:
+    ObjectFile::Arch arch;
+    ArmLiveProcess64(LiveProcessParams * params, ObjectFile *objFile,
+                     ObjectFile::Arch _arch);
 
     void initState();
 
   public:
-    void argsInit(int intSize, int pageSize);
 
     ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i, int width);
     ArmISA::IntReg getSyscallArg(ThreadContext *tc, int &i);
diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh
index b9033fd5b..09041f306 100644
--- a/src/arch/arm/registers.hh
+++ b/src/arch/arm/registers.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010-2011 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -74,11 +74,12 @@ typedef uint8_t CCReg;
 // Constants Related to the number of registers
 const int NumIntArchRegs = NUM_ARCH_INTREGS;
 // The number of single precision floating point registers
-const int NumFloatArchRegs = 64;
-const int NumFloatSpecialRegs = 8;
+const int NumFloatV7ArchRegs  = 64;
+const int NumFloatV8ArchRegs  = 128;
+const int NumFloatSpecialRegs = 32;
 
 const int NumIntRegs = NUM_INTREGS;
-const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;
+const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
 const int NumCCRegs = 0;
 const int NumMiscRegs = NUM_MISCREGS;
 
@@ -89,6 +90,7 @@ const int ReturnValueReg = 0;
 const int ReturnValueReg1 = 1;
 const int ReturnValueReg2 = 2;
 const int NumArgumentRegs = 4;
+const int NumArgumentRegs64 = 8;
 const int ArgumentReg0 = 0;
 const int ArgumentReg1 = 1;
 const int ArgumentReg2 = 2;
diff --git a/src/arch/arm/remote_gdb.cc b/src/arch/arm/remote_gdb.cc
index 4078630d6..74c3c7ff3 100644
--- a/src/arch/arm/remote_gdb.cc
+++ b/src/arch/arm/remote_gdb.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -138,6 +138,7 @@
 #include "arch/arm/pagetable.hh"
 #include "arch/arm/registers.hh"
 #include "arch/arm/remote_gdb.hh"
+#include "arch/arm/system.hh"
 #include "arch/arm/utility.hh"
 #include "arch/arm/vtophys.hh"
 #include "base/intmath.hh"
@@ -159,7 +160,7 @@ using namespace std;
 using namespace ArmISA;
 
 RemoteGDB::RemoteGDB(System *_system, ThreadContext *tc)
-    : BaseRemoteGDB(_system, tc, NUMREGS)
+    : BaseRemoteGDB(_system, tc, MAX_NUMREGS)
 {
 }
 
@@ -204,45 +205,65 @@ RemoteGDB::getregs()
 
     memset(gdbregs.regs, 0, gdbregs.bytes());
 
-    // R0-R15 supervisor mode
-    // arm registers are 32 bits wide, gdb registers are 64 bits wide
-    // two arm registers are packed into one gdb register (little endian)
-    gdbregs.regs[REG_R0 + 0] = context->readIntReg(INTREG_R1) << 32 |
-                                       context->readIntReg(INTREG_R0);
-    gdbregs.regs[REG_R0 + 1] = context->readIntReg(INTREG_R3) << 32 |
-                                       context->readIntReg(INTREG_R2);
-    gdbregs.regs[REG_R0 + 2] = context->readIntReg(INTREG_R5) << 32 |
-                                       context->readIntReg(INTREG_R4);
-    gdbregs.regs[REG_R0 + 3] = context->readIntReg(INTREG_R7) << 32 |
-                                       context->readIntReg(INTREG_R6);
-    gdbregs.regs[REG_R0 + 4] = context->readIntReg(INTREG_R9) << 32 |
-                                       context->readIntReg(INTREG_R8);
-    gdbregs.regs[REG_R0 + 5] = context->readIntReg(INTREG_R11) << 32|
-                                       context->readIntReg(INTREG_R10);
-    gdbregs.regs[REG_R0 + 6] = context->readIntReg(INTREG_SP) << 32 |
-                                       context->readIntReg(INTREG_R12);
-    gdbregs.regs[REG_R0 + 7] = context->pcState().pc() << 32        |
-                                       context->readIntReg(INTREG_LR);
+    if (inAArch64(context)) {  // AArch64
+        // x0-x31
+        for (int i = 0; i < 32; ++i) {
+            gdbregs.regs[REG_X0 + i] = context->readIntReg(INTREG_X0 + i);
+        }
+        // pc
+        gdbregs.regs[REG_PC_64] = context->pcState().pc();
+        // cpsr
+        gdbregs.regs[REG_CPSR_64] = context->readMiscRegNoEffect(MISCREG_CPSR);
+        // v0-v31
+        for (int i = 0; i < 32; ++i) {
+            gdbregs.regs[REG_V0 + 2 * i] = static_cast<uint64_t>(
+                context->readFloatRegBits(i * 4 + 3)) << 32 |
+                    context->readFloatRegBits(i * 4 + 2);
+            gdbregs.regs[REG_V0 + 2 * i + 1] = static_cast<uint64_t>(
+                context->readFloatRegBits(i * 4 + 1)) << 32 |
+                    context->readFloatRegBits(i * 4 + 0);
+        }
+    } else {  // AArch32
+        // R0-R15 supervisor mode
+        // arm registers are 32 bits wide, gdb registers are 64 bits wide two
+        // arm registers are packed into one gdb register (little endian)
+        gdbregs.regs[REG_R0 + 0] = context->readIntReg(INTREG_R1) << 32 |
+            context->readIntReg(INTREG_R0);
+        gdbregs.regs[REG_R0 + 1] = context->readIntReg(INTREG_R3) << 32 |
+            context->readIntReg(INTREG_R2);
+        gdbregs.regs[REG_R0 + 2] = context->readIntReg(INTREG_R5) << 32 |
+            context->readIntReg(INTREG_R4);
+        gdbregs.regs[REG_R0 + 3] = context->readIntReg(INTREG_R7) << 32 |
+            context->readIntReg(INTREG_R6);
+        gdbregs.regs[REG_R0 + 4] = context->readIntReg(INTREG_R9) << 32 |
+            context->readIntReg(INTREG_R8);
+        gdbregs.regs[REG_R0 + 5] = context->readIntReg(INTREG_R11) << 32|
+            context->readIntReg(INTREG_R10);
+        gdbregs.regs[REG_R0 + 6] = context->readIntReg(INTREG_SP) << 32 |
+            context->readIntReg(INTREG_R12);
+        gdbregs.regs[REG_R0 + 7] = context->pcState().pc() << 32        |
+            context->readIntReg(INTREG_LR);
 
-    // CPSR
-    gdbregs.regs[REG_CPSR]  = context->readMiscRegNoEffect(MISCREG_CPSR);
+        // CPSR
+        gdbregs.regs[REG_CPSR]  = context->readMiscRegNoEffect(MISCREG_CPSR);
 
-    // vfpv3/neon floating point registers (32 double or 64 float)
+        // vfpv3/neon floating point registers (32 double or 64 float)
 
-    gdbregs.regs[REG_F0] =
-        static_cast<uint64_t>(context->readFloatRegBits(0)) << 32 |
-        gdbregs.regs[REG_CPSR];
+        gdbregs.regs[REG_F0] =
+            static_cast<uint64_t>(context->readFloatRegBits(0)) << 32 |
+            gdbregs.regs[REG_CPSR];
 
-    for (int i = 1; i < (NumFloatArchRegs>>1); ++i) {
-      gdbregs.regs[i + REG_F0] =
-          static_cast<uint64_t>(context->readFloatRegBits(2*i)) << 32 |
-          context->readFloatRegBits(2*i-1);
+        for (int i = 1; i < (NumFloatV7ArchRegs>>1); ++i) {
+            gdbregs.regs[i + REG_F0] =
+                static_cast<uint64_t>(context->readFloatRegBits(2*i)) << 32 |
+                context->readFloatRegBits(2*i-1);
+        }
+
+        // FPSCR
+        gdbregs.regs[REG_FPSCR] = static_cast<uint64_t>(
+            context->readMiscRegNoEffect(MISCREG_FPSCR)) << 32 |
+                context->readFloatRegBits(NumFloatV7ArchRegs - 1);
     }
-
-  // FPSCR
-    gdbregs.regs[REG_FPSCR] =
-        static_cast<uint64_t>(context->readMiscRegNoEffect(MISCREG_FPSCR)) << 32 |
-        context->readFloatRegBits(NumFloatArchRegs - 1);
 }
 
 /*
@@ -254,46 +275,66 @@ RemoteGDB::setregs()
 {
 
     DPRINTF(GDBAcc, "setregs in remotegdb \n");
+    if (inAArch64(context)) {  // AArch64
+        // x0-x31
+        for (int i = 0; i < 32; ++i) {
+            context->setIntReg(INTREG_X0 + i, gdbregs.regs[REG_X0 + i]);
+        }
+        // pc
+        context->pcState(gdbregs.regs[REG_PC_64]);
+        // cpsr
+        context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR_64]);
+        // v0-v31
+        for (int i = 0; i < 32; ++i) {
+          context->setFloatRegBits(i * 4 + 3,
+                                   gdbregs.regs[REG_V0 + 2 * i] >> 32);
+          context->setFloatRegBits(i * 4 + 2,
+                                   gdbregs.regs[REG_V0 + 2 * i]);
+          context->setFloatRegBits(i * 4 + 1,
+                                   gdbregs.regs[REG_V0 + 2 * i + 1] >> 32);
+          context->setFloatRegBits(i * 4 + 0,
+                                   gdbregs.regs[REG_V0 + 2 * i + 1]);
+        }
+    } else {  // AArch32
+        // R0-R15 supervisor mode
+        // arm registers are 32 bits wide, gdb registers are 64 bits wide
+        // two arm registers are packed into one gdb register (little endian)
+        context->setIntReg(INTREG_R0 , bits(gdbregs.regs[REG_R0 + 0], 31, 0));
+        context->setIntReg(INTREG_R1 , bits(gdbregs.regs[REG_R0 + 0], 63, 32));
+        context->setIntReg(INTREG_R2 , bits(gdbregs.regs[REG_R0 + 1], 31, 0));
+        context->setIntReg(INTREG_R3 , bits(gdbregs.regs[REG_R0 + 1], 63, 32));
+        context->setIntReg(INTREG_R4 , bits(gdbregs.regs[REG_R0 + 2], 31, 0));
+        context->setIntReg(INTREG_R5 , bits(gdbregs.regs[REG_R0 + 2], 63, 32));
+        context->setIntReg(INTREG_R6 , bits(gdbregs.regs[REG_R0 + 3], 31, 0));
+        context->setIntReg(INTREG_R7 , bits(gdbregs.regs[REG_R0 + 3], 63, 32));
+        context->setIntReg(INTREG_R8 , bits(gdbregs.regs[REG_R0 + 4], 31, 0));
+        context->setIntReg(INTREG_R9 , bits(gdbregs.regs[REG_R0 + 4], 63, 32));
+        context->setIntReg(INTREG_R10, bits(gdbregs.regs[REG_R0 + 5], 31, 0));
+        context->setIntReg(INTREG_R11, bits(gdbregs.regs[REG_R0 + 5], 63, 32));
+        context->setIntReg(INTREG_R12, bits(gdbregs.regs[REG_R0 + 6], 31, 0));
+        context->setIntReg(INTREG_SP , bits(gdbregs.regs[REG_R0 + 6], 63, 32));
+        context->setIntReg(INTREG_LR , bits(gdbregs.regs[REG_R0 + 7], 31, 0));
+        context->pcState(bits(gdbregs.regs[REG_R0 + 7], 63, 32));
 
-    // R0-R15 supervisor mode
-    // arm registers are 32 bits wide, gdb registers are 64 bits wide
-    // two arm registers are packed into one gdb register (little endian)
-    context->setIntReg(INTREG_R0 , bits(gdbregs.regs[REG_R0 + 0], 31, 0));
-    context->setIntReg(INTREG_R1 , bits(gdbregs.regs[REG_R0 + 0], 63, 32));
-    context->setIntReg(INTREG_R2 , bits(gdbregs.regs[REG_R0 + 1], 31, 0));
-    context->setIntReg(INTREG_R3 , bits(gdbregs.regs[REG_R0 + 1], 63, 32));
-    context->setIntReg(INTREG_R4 , bits(gdbregs.regs[REG_R0 + 2], 31, 0));
-    context->setIntReg(INTREG_R5 , bits(gdbregs.regs[REG_R0 + 2], 63, 32));
-    context->setIntReg(INTREG_R6 , bits(gdbregs.regs[REG_R0 + 3], 31, 0));
-    context->setIntReg(INTREG_R7 , bits(gdbregs.regs[REG_R0 + 3], 63, 32));
-    context->setIntReg(INTREG_R8 , bits(gdbregs.regs[REG_R0 + 4], 31, 0));
-    context->setIntReg(INTREG_R9 , bits(gdbregs.regs[REG_R0 + 4], 63, 32));
-    context->setIntReg(INTREG_R10, bits(gdbregs.regs[REG_R0 + 5], 31, 0));
-    context->setIntReg(INTREG_R11, bits(gdbregs.regs[REG_R0 + 5], 63, 32));
-    context->setIntReg(INTREG_R12, bits(gdbregs.regs[REG_R0 + 6], 31, 0));
-    context->setIntReg(INTREG_SP , bits(gdbregs.regs[REG_R0 + 6], 63, 32));
-    context->setIntReg(INTREG_LR , bits(gdbregs.regs[REG_R0 + 7], 31, 0));
-    context->pcState(bits(gdbregs.regs[REG_R0 + 7], 63, 32));
+        //CPSR
+        context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR]);
 
-    //CPSR
-    context->setMiscRegNoEffect(MISCREG_CPSR, gdbregs.regs[REG_CPSR]);
+        //vfpv3/neon floating point registers (32 double or 64 float)
+        context->setFloatRegBits(0, gdbregs.regs[REG_F0]>>32);
 
-    //vfpv3/neon floating point registers (32 double or 64 float)
-    context->setFloatRegBits(0, gdbregs.regs[REG_F0]>>32);
+        for (int i = 1; i < NumFloatV7ArchRegs; ++i) {
+            if (i%2) {
+                int j = (i+1)/2;
+                context->setFloatRegBits(i, bits(gdbregs.regs[j + REG_F0], 31, 0));
+            } else {
+                int j = i/2;
+                context->setFloatRegBits(i, gdbregs.regs[j + REG_F0]>>32);
+            }
+        }
 
-    for (int i = 1; i < NumFloatArchRegs; ++i) {
-      if(i%2){
-        int j = (i+1)/2;
-        context->setFloatRegBits(i, bits(gdbregs.regs[j + REG_F0], 31, 0));
-      }
-      else{
-        int j = i/2;
-        context->setFloatRegBits(i, gdbregs.regs[j + REG_F0]>>32);
-      }
+        //FPSCR
+        context->setMiscReg(MISCREG_FPSCR, gdbregs.regs[REG_FPSCR]>>32);
     }
-
-    //FPSCR
-    context->setMiscReg(MISCREG_FPSCR, gdbregs.regs[REG_FPSCR]>>32);
 }
 
 void
diff --git a/src/arch/arm/remote_gdb.hh b/src/arch/arm/remote_gdb.hh
index b75d921fb..a6b2b9d35 100644
--- a/src/arch/arm/remote_gdb.hh
+++ b/src/arch/arm/remote_gdb.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2002-2005 The Regents of The University of Michigan
  * Copyright (c) 2007-2008 The Florida State University
  * All rights reserved.
@@ -40,13 +52,24 @@ class ThreadContext;
 
 namespace ArmISA
 {
-// registers for arm with vfpv3/neon
+
+// AArch32 registers with vfpv3/neon
 const int NUMREGS   = 41;  /* r0-r15, cpsr, d0-d31, fpscr */
 const int REG_R0 = 0;
 const int REG_F0 = 8;
 const int REG_CPSR  = 8;   /* bit 512 to bit 543  */
 const int REG_FPSCR = 40;  /* bit 2592 to bit 2623 */
 
+// AArch64 registers
+const int NUMREGS_64 = 98;  // x0-x31, pc, cpsr (64-bit GPRs)
+                            // v0-v31 (128-bit FPRs)
+const int REG_X0 = 0;
+const int REG_PC_64 = 32;
+const int REG_CPSR_64 = 33;
+const int REG_V0 = 34;
+
+const int MAX_NUMREGS = NUMREGS_64;
+
 class RemoteGDB : public BaseRemoteGDB
 {
 
diff --git a/src/arch/arm/stage2_lookup.cc b/src/arch/arm/stage2_lookup.cc
new file mode 100755
index 000000000..1299ade68
--- /dev/null
+++ b/src/arch/arm/stage2_lookup.cc
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2010-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ *          Giacomo Gabrielli
+ */
+
+#include "arch/arm/faults.hh"
+#include "arch/arm/stage2_lookup.hh"
+#include "arch/arm/system.hh"
+#include "arch/arm/table_walker.hh"
+#include "arch/arm/tlb.hh"
+#include "cpu/base.hh"
+#include "cpu/thread_context.hh"
+#include "debug/Checkpoint.hh"
+#include "debug/TLB.hh"
+#include "debug/TLBVerbose.hh"
+#include "sim/system.hh"
+
+using namespace ArmISA;
+
+Fault
+Stage2LookUp::getTe(ThreadContext *tc, TlbEntry *destTe)
+
+{
+    fault = stage2Tlb->getTE(&stage2Te, &req, tc, mode, this, timing,
+                                   functional, false, tranType);
+    // Call finish if we're done already
+    if ((fault != NoFault) || (stage2Te != NULL)) {
+        mergeTe(&req, mode);
+        *destTe = stage1Te;
+    }
+    return fault;
+}
+
+void
+Stage2LookUp::mergeTe(RequestPtr req, BaseTLB::Mode mode)
+{
+    // Since we directly requested the table entry (which we need later on to
+    // merge the attributes) then we've skipped some stage 2 permissinos
+    // checking. So call translate on stage 2 to do the checking. As the entry
+    // is now in the TLB this should always hit the cache.
+    if (fault == NoFault) {
+        fault = stage2Tlb->checkPermissions(stage2Te, req, mode);
+    }
+
+    // Check again that we haven't got a fault
+    if (fault == NoFault) {
+        assert(stage2Te != NULL);
+
+        // Now we have the table entries for both stages of translation
+        // merge them and insert the result into the stage 1 TLB. See
+        // CombineS1S2Desc() in pseudocode
+        stage1Te.N             = stage2Te->N;
+        stage1Te.nonCacheable |= stage2Te->nonCacheable;
+        stage1Te.xn           |= stage2Te->xn;
+
+        if (stage1Te.size > stage2Te->size) {
+            // Size mismatch also implies vpn mismatch (this is shifted by
+            // sizebits!).
+            stage1Te.vpn  = s1Req->getVaddr() / (stage2Te->size+1);
+            stage1Te.pfn  = stage2Te->pfn;
+            stage1Te.size = stage2Te->size;
+        } else if (stage1Te.size < stage2Te->size) {
+            // Guest 4K could well be section-backed by host hugepage!  In this
+            // case a 4K entry is added but pfn needs to be adjusted.  New PFN =
+            // offset into section PFN given by stage2 IPA treated as a stage1
+            // page size.
+            stage1Te.pfn = (stage2Te->pfn * ((stage2Te->size+1) / (stage1Te.size+1))) +
+                           (stage2Te->vpn / (stage1Te.size+1));
+            // Size remains smaller of the two.
+        } else {
+            // Matching sizes
+            stage1Te.pfn = stage2Te->pfn;
+        }
+
+        if (stage2Te->mtype == TlbEntry::MemoryType::StronglyOrdered ||
+            stage1Te.mtype  == TlbEntry::MemoryType::StronglyOrdered) {
+            stage1Te.mtype  =  TlbEntry::MemoryType::StronglyOrdered;
+        } else if (stage2Te->mtype == TlbEntry::MemoryType::Device ||
+                   stage1Te.mtype  == TlbEntry::MemoryType::Device) {
+            stage1Te.mtype = TlbEntry::MemoryType::Device;
+        } else {
+            stage1Te.mtype = TlbEntry::MemoryType::Normal;
+        }
+
+        if (stage1Te.mtype == TlbEntry::MemoryType::Normal) {
+
+            if (stage2Te->innerAttrs == 0 ||
+                stage1Te.innerAttrs  == 0) {
+                // either encoding Non-cacheable
+                stage1Te.innerAttrs = 0;
+            } else if (stage2Te->innerAttrs == 2 ||
+                       stage1Te.innerAttrs  == 2) {
+                // either encoding Write-Through cacheable
+                stage1Te.innerAttrs = 2;
+            } else {
+                // both encodings Write-Back
+                stage1Te.innerAttrs = 3;
+            }
+
+            if (stage2Te->outerAttrs == 0 ||
+                stage1Te.outerAttrs  == 0) {
+                // either encoding Non-cacheable
+                stage1Te.outerAttrs = 0;
+            } else if (stage2Te->outerAttrs == 2 ||
+                       stage1Te.outerAttrs  == 2) {
+                // either encoding Write-Through cacheable
+                stage1Te.outerAttrs = 2;
+            } else {
+                // both encodings Write-Back
+                stage1Te.outerAttrs = 3;
+            }
+
+            stage1Te.shareable       |= stage2Te->shareable;
+            stage1Te.outerShareable |= stage2Te->outerShareable;
+            if (stage1Te.innerAttrs == 0 &&
+                stage1Te.outerAttrs == 0) {
+                // something Non-cacheable at each level is outer shareable
+                stage1Te.shareable       = true;
+                stage1Te.outerShareable = true;
+            }
+        } else {
+            stage1Te.shareable       = true;
+            stage1Te.outerShareable = true;
+        }
+        stage1Te.updateAttributes();
+    }
+
+    // if there's a fault annotate it,
+    if (fault != NoFault) {
+        // If the second stage of translation generated a fault add the
+        // details of the original stage 1 virtual address
+        reinterpret_cast<ArmFault *>(fault.get())->annotate(ArmFault::OVA,
+            s1Req->getVaddr());
+    }
+    complete = true;
+}
+
+void
+Stage2LookUp::finish(Fault _fault, RequestPtr req,
+    ThreadContext *tc, BaseTLB::Mode mode)
+{
+    fault = _fault;
+    // if we haven't got the table entry get it now
+    if ((fault == NoFault) && (stage2Te == NULL)) {
+        fault = stage2Tlb->getTE(&stage2Te, req, tc, mode, this,
+            timing, functional, false, tranType);
+    }
+
+    // Now we have the stage 2 table entry we need to merge it with the stage
+    // 1 entry we were given at the start
+    mergeTe(req, mode);
+
+    if (fault != NoFault) {
+        transState->finish(fault, req, tc, mode);
+    } else if (timing) {
+        // Now notify the original stage 1 translation that we finally have
+        // a result
+        stage1Tlb->translateComplete(s1Req, tc, transState, mode, tranType, true);
+    }
+    // if we have been asked to delete ourselfs do it now
+    if (selfDelete) {
+        delete this;
+    }
+}
+
diff --git a/src/arch/arm/stage2_lookup.hh b/src/arch/arm/stage2_lookup.hh
new file mode 100755
index 000000000..3a1228f46
--- /dev/null
+++ b/src/arch/arm/stage2_lookup.hh
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2010-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Ali Saidi
+ *          Giacomo Gabrielli
+ */
+
+#ifndef __ARCH_ARM_STAGE2_LOOKUP_HH__
+#define __ARCH_ARM_STAGE2_LOOKUP_HH__
+
+#include <list>
+
+#include "arch/arm/system.hh"
+#include "arch/arm/table_walker.hh"
+#include "arch/arm/tlb.hh"
+#include "mem/request.hh"
+#include "sim/tlb.hh"
+
+class ThreadContext;
+
+namespace ArmISA {
+class Translation;
+class TLB;
+
+
+class Stage2LookUp : public BaseTLB::Translation
+{
+  private:
+    TLB                     *stage1Tlb;
+    TLB               *stage2Tlb;
+    TlbEntry                stage1Te;
+    RequestPtr              s1Req;
+    TLB::Translation        *transState;
+    BaseTLB::Mode           mode;
+    bool                    timing;
+    bool                    functional;
+    TLB::ArmTranslationType tranType;
+    TlbEntry                *stage2Te;
+    Request                 req;
+    Fault                   fault;
+    bool                    complete;
+    bool                    selfDelete;
+
+  public:
+    Stage2LookUp(TLB *s1Tlb, TLB *s2Tlb, TlbEntry s1Te, RequestPtr _req,
+        TLB::Translation *_transState, BaseTLB::Mode _mode, bool _timing,
+        bool _functional, TLB::ArmTranslationType _tranType) :
+        stage1Tlb(s1Tlb), stage2Tlb(s2Tlb), stage1Te(s1Te), s1Req(_req),
+        transState(_transState), mode(_mode), timing(_timing),
+        functional(_functional), tranType(_tranType), fault(NoFault),
+        complete(false), selfDelete(false)
+    {
+        req.setVirt(0, s1Te.pAddr(s1Req->getVaddr()), s1Req->getSize(),
+                    s1Req->getFlags(), s1Req->masterId(), 0);
+    }
+
+    Fault getTe(ThreadContext *tc, TlbEntry *destTe);
+
+    void mergeTe(RequestPtr req, BaseTLB::Mode mode);
+
+    void setSelfDelete() { selfDelete = true; }
+
+    bool isComplete() const { return complete; }
+
+    void markDelayed() {}
+
+    void finish(Fault fault, RequestPtr req, ThreadContext *tc,
+                BaseTLB::Mode mode);
+};
+
+
+} // namespace ArmISA
+
+#endif //__ARCH_ARM_STAGE2_LOOKUP_HH__
+
diff --git a/src/arch/arm/stage2_mmu.cc b/src/arch/arm/stage2_mmu.cc
new file mode 100755
index 000000000..01451548c
--- /dev/null
+++ b/src/arch/arm/stage2_mmu.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2012-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Thomas Grocutt
+ */
+
+#include "arch/arm/faults.hh"
+#include "arch/arm/stage2_mmu.hh"
+#include "arch/arm/system.hh"
+#include "arch/arm/tlb.hh"
+#include "cpu/base.hh"
+#include "cpu/thread_context.hh"
+#include "debug/Checkpoint.hh"
+#include "debug/TLB.hh"
+#include "debug/TLBVerbose.hh"
+
+using namespace ArmISA;
+
+Stage2MMU::Stage2MMU(const Params *p)
+    : SimObject(p), _stage1Tlb(p->tlb), _stage2Tlb(p->stage2_tlb)
+{
+    stage1Tlb()->setMMU(this);
+    stage2Tlb()->setMMU(this);
+}
+
+Fault
+Stage2MMU::readDataUntimed(ThreadContext *tc, Addr oVAddr, Addr descAddr,
+    uint8_t *data, int numBytes, Request::Flags flags, int masterId,
+    bool isFunctional)
+{
+    Fault fault;
+
+    // translate to physical address using the second stage MMU
+    Request req = Request();
+    req.setVirt(0, descAddr, numBytes, flags | Request::PT_WALK, masterId, 0);
+    if (isFunctional) {
+        fault = stage2Tlb()->translateFunctional(&req, tc, BaseTLB::Read);
+    } else {
+        fault = stage2Tlb()->translateAtomic(&req, tc, BaseTLB::Read);
+    }
+
+    // Now do the access.
+    if (fault == NoFault && !req.getFlags().isSet(Request::NO_ACCESS)) {
+        Packet pkt = Packet(&req, MemCmd::ReadReq);
+        pkt.dataStatic(data);
+        if (isFunctional) {
+            stage1Tlb()->getWalkerPort().sendFunctional(&pkt);
+        } else {
+            stage1Tlb()->getWalkerPort().sendAtomic(&pkt);
+        }
+        assert(!pkt.isError());
+    }
+
+    // If there was a fault annotate it with the flag saying the foult occured
+    // while doing a translation for a stage 1 page table walk.
+    if (fault != NoFault) {
+        ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get());
+        armFault->annotate(ArmFault::S1PTW, true);
+        armFault->annotate(ArmFault::OVA, oVAddr);
+    }
+    return fault;
+}
+
+Fault
+Stage2MMU::readDataTimed(ThreadContext *tc, Addr descAddr,
+    Stage2Translation *translation, int numBytes, Request::Flags flags,
+    int masterId)
+{
+    Fault fault;
+    // translate to physical address using the second stage MMU
+    translation->setVirt(descAddr, numBytes, flags | Request::PT_WALK, masterId);
+    fault = translation->translateTiming(tc);
+    return fault;
+}
+
+Stage2MMU::Stage2Translation::Stage2Translation(Stage2MMU &_parent,
+        uint8_t *_data, Event *_event, Addr _oVAddr)
+    : data(_data), event(_event), parent(_parent), oVAddr(_oVAddr),
+    fault(NoFault)
+{
+}
+
+void
+Stage2MMU::Stage2Translation::finish(Fault _fault, RequestPtr req, ThreadContext *tc,
+    BaseTLB::Mode mode)
+{
+    fault = _fault;
+
+    // If there was a fault annotate it with the flag saying the foult occured
+    // while doing a translation for a stage 1 page table walk.
+    if (fault != NoFault) {
+        ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get());
+        armFault->annotate(ArmFault::S1PTW, true);
+        armFault->annotate(ArmFault::OVA, oVAddr);
+    }
+
+    if (_fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
+        DmaPort& port = parent.stage1Tlb()->getWalkerPort();
+        port.dmaAction(MemCmd::ReadReq, req->getPaddr(), numBytes,
+                       event, data, tc->getCpuPtr()->clockPeriod(),
+                       req->getFlags());
+    } else {
+        // We can't do the DMA access as there's been a problem, so tell the
+        // event we're done
+        event->process();
+    }
+}
+
+ArmISA::Stage2MMU *
+ArmStage2MMUParams::create()
+{
+    return new ArmISA::Stage2MMU(this);
+}
diff --git a/src/arch/arm/stage2_mmu.hh b/src/arch/arm/stage2_mmu.hh
new file mode 100755
index 000000000..d1812c4ed
--- /dev/null
+++ b/src/arch/arm/stage2_mmu.hh
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2012-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Thomas Grocutt
+ */
+
+#ifndef __ARCH_ARM_STAGE2_MMU_HH__
+#define __ARCH_ARM_STAGE2_MMU_HH__
+
+#include "arch/arm/faults.hh"
+#include "arch/arm/tlb.hh"
+#include "mem/request.hh"
+#include "params/ArmStage2MMU.hh"
+#include "sim/eventq.hh"
+
+namespace ArmISA {
+
+class Stage2MMU : public SimObject
+{
+  private:
+    TLB *_stage1Tlb;
+    /** The TLB that will cache the stage 2 look ups. */
+    TLB *_stage2Tlb;
+
+  public:
+    /** This translation class is used to trigger the data fetch once a timing
+        translation returns the translated physical address */
+    class Stage2Translation : public BaseTLB::Translation
+    {
+      private:
+        uint8_t   *data;
+        int       numBytes;
+        Request   req;
+        Event     *event;
+        Stage2MMU &parent;
+        Addr      oVAddr;
+
+      public:
+        Fault fault;
+
+        Stage2Translation(Stage2MMU &_parent, uint8_t *_data, Event *_event,
+                          Addr _oVAddr);
+
+        void
+        markDelayed() {}
+
+        void
+        finish(Fault fault, RequestPtr req, ThreadContext *tc,
+               BaseTLB::Mode mode);
+
+        void setVirt(Addr vaddr, int size, Request::Flags flags, int masterId)
+        {
+            numBytes = size;
+            req.setVirt(0, vaddr, size, flags, masterId, 0);
+        }
+
+        Fault translateTiming(ThreadContext *tc)
+        {
+            return (parent.stage2Tlb()->translateTiming(&req, tc, this, BaseTLB::Read));
+        }
+    };
+
+    typedef ArmStage2MMUParams Params;
+    Stage2MMU(const Params *p);
+
+    Fault readDataUntimed(ThreadContext *tc, Addr oVAddr, Addr descAddr,
+        uint8_t *data, int numBytes, Request::Flags flags, int masterId,
+        bool isFunctional);
+    Fault readDataTimed(ThreadContext *tc, Addr descAddr,
+        Stage2Translation *translation, int numBytes, Request::Flags flags,
+        int masterId);
+
+    TLB* stage1Tlb() const { return _stage1Tlb; }
+    TLB* stage2Tlb() const { return _stage2Tlb; }
+};
+
+
+
+} // namespace ArmISA
+
+#endif //__ARCH_ARM_STAGE2_MMU_HH__
+
diff --git a/src/arch/arm/system.cc b/src/arch/arm/system.cc
index b09784b64..00d9d7613 100644
--- a/src/arch/arm/system.cc
+++ b/src/arch/arm/system.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -48,19 +48,46 @@
 #include "cpu/thread_context.hh"
 #include "mem/physical.hh"
 #include "mem/fs_translating_port_proxy.hh"
+#include "sim/full_system.hh"
 
 using namespace std;
 using namespace Linux;
 
 ArmSystem::ArmSystem(Params *p)
-    : System(p), bootldr(NULL), multiProc(p->multi_proc)
+    : System(p), bootldr(NULL), _haveSecurity(p->have_security),
+      _haveLPAE(p->have_lpae),
+      _haveVirtualization(p->have_virtualization),
+      _haveGenericTimer(p->have_generic_timer),
+      _highestELIs64(p->highest_el_is_64),
+      _resetAddr64(p->reset_addr_64),
+      _physAddrRange64(p->phys_addr_range_64),
+      _haveLargeAsid64(p->have_large_asid_64),
+      multiProc(p->multi_proc)
 {
+    // Check if the physical address range is valid
+    if (_highestELIs64 && (
+            _physAddrRange64 < 32 ||
+            _physAddrRange64 > 48 ||
+            (_physAddrRange64 % 4 != 0 && _physAddrRange64 != 42))) {
+        fatal("Invalid physical address range (%d)\n", _physAddrRange64);
+    }
+
     if (p->boot_loader != "") {
         bootldr = createObjectFile(p->boot_loader);
 
         if (!bootldr)
             fatal("Could not read bootloader: %s\n", p->boot_loader);
 
+        if ((bootldr->getArch() == ObjectFile::Arm64) && !_highestELIs64) {
+            warn("Highest ARM exception-level set to AArch32 but bootloader "
+                  "is for AArch64. Assuming you wanted these to match.\n");
+            _highestELIs64 = true;
+        } else if ((bootldr->getArch() == ObjectFile::Arm) && _highestELIs64) {
+            warn("Highest ARM exception-level set to AArch64 but bootloader "
+                  "is for AArch32. Assuming you wanted these to match.\n");
+            _highestELIs64 = false;
+        }
+
         bootldr->loadGlobalSymbols(debugSymbolTable);
 
     }
@@ -81,11 +108,21 @@ ArmSystem::initState()
     if (bootldr) {
         bootldr->loadSections(physProxy);
 
-        uint8_t jump_to_bl[] =
+        uint8_t jump_to_bl_32[] =
         {
-            0x07, 0xf0, 0xa0, 0xe1  // branch to r7
+            0x07, 0xf0, 0xa0, 0xe1  // branch to r7 in aarch32
         };
-        physProxy.writeBlob(0x0, jump_to_bl, sizeof(jump_to_bl));
+
+        uint8_t jump_to_bl_64[] =
+        {
+            0xe0, 0x00, 0x1f, 0xd6  // instruction "br x7" in aarch64
+        };
+
+        // write the jump to branch table into address 0
+        if (!_highestELIs64)
+            physProxy.writeBlob(0x0, jump_to_bl_32, sizeof(jump_to_bl_32));
+        else
+            physProxy.writeBlob(0x0, jump_to_bl_64, sizeof(jump_to_bl_64));
 
         inform("Using bootloader at address %#x\n", bootldr->entryPoint());
 
@@ -96,24 +133,116 @@ ArmSystem::initState()
             fatal("gic_cpu_addr && flags_addr must be set with bootloader\n");
 
         for (int i = 0; i < threadContexts.size(); i++) {
-            threadContexts[i]->setIntReg(3, kernelEntry & loadAddrMask);
+            if (!_highestELIs64)
+                threadContexts[i]->setIntReg(3, (kernelEntry & loadAddrMask) +
+                        loadAddrOffset);
             threadContexts[i]->setIntReg(4, params()->gic_cpu_addr);
             threadContexts[i]->setIntReg(5, params()->flags_addr);
             threadContexts[i]->setIntReg(7, bootldr->entryPoint());
         }
+        inform("Using kernel entry physical address at %#x\n",
+               (kernelEntry & loadAddrMask) + loadAddrOffset);
     } else {
         // Set the initial PC to be at start of the kernel code
-        threadContexts[0]->pcState(kernelEntry & loadAddrMask);
+        if (!_highestELIs64)
+            threadContexts[0]->pcState((kernelEntry & loadAddrMask) +
+                    loadAddrOffset);
     }
 }
 
+GenericTimer::ArchTimer *
+ArmSystem::getArchTimer(int cpu_id) const
+{
+    if (_genericTimer) {
+        return _genericTimer->getArchTimer(cpu_id);
+    }
+    return NULL;
+}
+
+GenericTimer::SystemCounter *
+ArmSystem::getSystemCounter() const
+{
+    if (_genericTimer) {
+        return _genericTimer->getSystemCounter();
+    }
+    return NULL;
+}
+
+bool
+ArmSystem::haveSecurity(ThreadContext *tc)
+{
+    if (!FullSystem)
+        return false;
+
+    ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr());
+    assert(a_sys);
+    return a_sys->haveSecurity();
+}
+
+
 ArmSystem::~ArmSystem()
 {
     if (debugPrintkEvent)
         delete debugPrintkEvent;
 }
 
+bool
+ArmSystem::haveLPAE(ThreadContext *tc)
+{
+    if (!FullSystem)
+        return false;
 
+    ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr());
+    assert(a_sys);
+    return a_sys->haveLPAE();
+}
+
+bool
+ArmSystem::haveVirtualization(ThreadContext *tc)
+{
+    if (!FullSystem)
+        return false;
+
+    ArmSystem *a_sys = dynamic_cast<ArmSystem *>(tc->getSystemPtr());
+    assert(a_sys);
+    return a_sys->haveVirtualization();
+}
+
+bool
+ArmSystem::highestELIs64(ThreadContext *tc)
+{
+    return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->highestELIs64();
+}
+
+ExceptionLevel
+ArmSystem::highestEL(ThreadContext *tc)
+{
+    return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->highestEL();
+}
+
+Addr
+ArmSystem::resetAddr64(ThreadContext *tc)
+{
+    return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->resetAddr64();
+}
+
+uint8_t
+ArmSystem::physAddrRange(ThreadContext *tc)
+{
+    return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->physAddrRange();
+}
+
+Addr
+ArmSystem::physAddrMask(ThreadContext *tc)
+{
+    return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->physAddrMask();
+}
+
+bool
+ArmSystem::haveLargeAsid64(ThreadContext *tc)
+{
+    return dynamic_cast<ArmSystem *>(tc->getSystemPtr())->haveLargeAsid64();
+}
 ArmSystem *
 ArmSystemParams::create()
 {
diff --git a/src/arch/arm/system.hh b/src/arch/arm/system.hh
index 3135c5da1..f906dc2d2 100644
--- a/src/arch/arm/system.hh
+++ b/src/arch/arm/system.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -46,11 +46,14 @@
 #include <string>
 #include <vector>
 
+#include "dev/arm/generic_timer.hh"
 #include "kern/linux/events.hh"
 #include "params/ArmSystem.hh"
 #include "sim/sim_object.hh"
 #include "sim/system.hh"
 
+class ThreadContext;
+
 class ArmSystem : public System
 {
   protected:
@@ -65,6 +68,54 @@ class ArmSystem : public System
      */
     ObjectFile *bootldr;
 
+    /**
+     * True if this system implements the Security Extensions
+     */
+    const bool _haveSecurity;
+
+    /**
+     * True if this system implements the Large Physical Address Extension
+     */
+    const bool _haveLPAE;
+
+    /**
+     * True if this system implements the virtualization Extensions
+     */
+    const bool _haveVirtualization;
+
+    /**
+     * True if this system implements the Generic Timer extension
+     */
+    const bool _haveGenericTimer;
+
+    /**
+     * Pointer to the Generic Timer wrapper.
+     */
+    GenericTimer *_genericTimer;
+
+    /**
+     * True if the register width of the highest implemented exception level is
+     * 64 bits (ARMv8)
+     */
+    bool _highestELIs64;
+
+    /**
+     * Reset address if the highest implemented exception level is 64 bits
+     * (ARMv8)
+     */
+    const Addr _resetAddr64;
+
+    /**
+     * Supported physical address range in bits if the highest implemented
+     * exception level is 64 bits (ARMv8)
+     */
+    const uint8_t _physAddrRange64;
+
+    /**
+     * True if ASID is 16 bits in AArch64 (ARMv8)
+     */
+    const bool _haveLargeAsid64;
+
   public:
     typedef ArmSystemParams Params;
     const Params *
@@ -101,6 +152,120 @@ class ArmSystem : public System
 
     /** true if this a multiprocessor system */
     bool multiProc;
+
+    /** Returns true if this system implements the Security Extensions */
+    bool haveSecurity() const { return _haveSecurity; }
+
+    /** Returns true if this system implements the Large Physical Address
+     * Extension */
+    bool haveLPAE() const { return _haveLPAE; }
+
+    /** Returns true if this system implements the virtualization
+      * Extensions
+      */
+    bool haveVirtualization() const { return _haveVirtualization; }
+
+    /** Returns true if this system implements the Generic Timer extension. */
+    bool haveGenericTimer() const { return _haveGenericTimer; }
+
+    /** Sets the pointer to the Generic Timer. */
+    void setGenericTimer(GenericTimer *generic_timer)
+    {
+        _genericTimer = generic_timer;
+    }
+
+    /** Returns a pointer to the system counter. */
+    GenericTimer::SystemCounter *getSystemCounter() const;
+
+    /** Returns a pointer to the appropriate architected timer. */
+    GenericTimer::ArchTimer *getArchTimer(int cpu_id) const;
+
+    /** Returns true if the register width of the highest implemented exception
+     * level is 64 bits (ARMv8) */
+    bool highestELIs64() const { return _highestELIs64; }
+
+    /** Returns the highest implemented exception level */
+    ExceptionLevel highestEL() const
+    {
+        if (_haveSecurity)
+            return EL3;
+        // @todo: uncomment this to enable Virtualization
+        // if (_haveVirtualization)
+        //     return EL2;
+        return EL1;
+    }
+
+    /** Returns the reset address if the highest implemented exception level is
+     * 64 bits (ARMv8) */
+    Addr resetAddr64() const { return _resetAddr64; }
+
+    /** Returns true if ASID is 16 bits in AArch64 (ARMv8) */
+    bool haveLargeAsid64() const { return _haveLargeAsid64; }
+
+    /** Returns the supported physical address range in bits if the highest
+     * implemented exception level is 64 bits (ARMv8) */
+    uint8_t physAddrRange64() const { return _physAddrRange64; }
+
+    /** Returns the supported physical address range in bits */
+    uint8_t physAddrRange() const
+    {
+        if (_highestELIs64)
+            return _physAddrRange64;
+        if (_haveLPAE)
+            return 40;
+        return 32;
+    }
+
+    /** Returns the physical address mask */
+    Addr physAddrMask() const
+    {
+        return mask(physAddrRange());
+    }
+
+    /** Returns true if the system of a specific thread context implements the
+     * Security Extensions
+     */
+    static bool haveSecurity(ThreadContext *tc);
+
+    /** Returns true if the system of a specific thread context implements the
+     * virtualization Extensions
+     */
+    static bool haveVirtualization(ThreadContext *tc);
+
+    /** Returns true if the system of a specific thread context implements the
+     * Large Physical Address Extension
+     */
+    static bool haveLPAE(ThreadContext *tc);
+
+    /** Returns true if the register width of the highest implemented exception
+     * level for the system of a specific thread context is 64 bits (ARMv8)
+     */
+    static bool highestELIs64(ThreadContext *tc);
+
+    /** Returns the highest implemented exception level for the system of a
+     * specific thread context
+     */
+    static ExceptionLevel highestEL(ThreadContext *tc);
+
+    /** Returns the reset address if the highest implemented exception level for
+     * the system of a specific thread context is 64 bits (ARMv8)
+     */
+    static Addr resetAddr64(ThreadContext *tc);
+
+    /** Returns the supported physical address range in bits for the system of a
+     * specific thread context
+     */
+    static uint8_t physAddrRange(ThreadContext *tc);
+
+    /** Returns the physical address mask for the system of a specific thread
+     * context
+     */
+    static Addr physAddrMask(ThreadContext *tc);
+
+    /** Returns true if ASID is 16 bits for the system of a specific thread
+     * context while in AArch64 (ARMv8) */
+    static bool haveLargeAsid64(ThreadContext *tc);
+
 };
 
 #endif
diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc
index d419fdec5..7eda13b3e 100644
--- a/src/arch/arm/table_walker.cc
+++ b/src/arch/arm/table_walker.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -35,9 +35,12 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Ali Saidi
+ *          Giacomo Gabrielli
  */
 
 #include "arch/arm/faults.hh"
+#include "arch/arm/stage2_mmu.hh"
+#include "arch/arm/system.hh"
 #include "arch/arm/table_walker.hh"
 #include "arch/arm/tlb.hh"
 #include "cpu/base.hh"
@@ -51,13 +54,33 @@
 using namespace ArmISA;
 
 TableWalker::TableWalker(const Params *p)
-    : MemObject(p), port(this, params()->sys), drainManager(NULL),
-      tlb(NULL), currState(NULL), pending(false),
-      masterId(p->sys->getMasterId(name())),
+    : MemObject(p), port(this, p->sys), drainManager(NULL),
+      stage2Mmu(NULL), isStage2(p->is_stage2), tlb(NULL),
+      currState(NULL), pending(false), masterId(p->sys->getMasterId(name())),
       numSquashable(p->num_squash_per_cycle),
-      doL1DescEvent(this), doL2DescEvent(this), doProcessEvent(this)
+      doL1DescEvent(this), doL2DescEvent(this),
+      doL0LongDescEvent(this), doL1LongDescEvent(this), doL2LongDescEvent(this),
+      doL3LongDescEvent(this),
+      doProcessEvent(this)
 {
     sctlr = 0;
+
+    // Cache system-level properties
+    if (FullSystem) {
+        armSys = dynamic_cast<ArmSystem *>(p->sys);
+        assert(armSys);
+        haveSecurity = armSys->haveSecurity();
+        _haveLPAE = armSys->haveLPAE();
+        _haveVirtualization = armSys->haveVirtualization();
+        physAddrRange = armSys->physAddrRange();
+        _haveLargeAsid64 = armSys->haveLargeAsid64();
+    } else {
+        armSys = NULL;
+        haveSecurity = _haveLPAE = _haveVirtualization = false;
+        _haveLargeAsid64 = false;
+        physAddrRange = 32;
+    }
+
 }
 
 TableWalker::~TableWalker()
@@ -65,10 +88,14 @@ TableWalker::~TableWalker()
     ;
 }
 
+TableWalker::WalkerState::WalkerState() : stage2Tran(NULL), l2Desc(l1Desc)
+{
+}
+
 void
 TableWalker::completeDrain()
 {
-    if (drainManager && stateQueueL1.empty() && stateQueueL2.empty() &&
+    if (drainManager && stateQueues[L1].empty() && stateQueues[L2].empty() &&
         pendingQueue.empty()) {
         setDrainState(Drainable::Drained);
         DPRINTF(Drain, "TableWalker done draining, processing drain event\n");
@@ -82,21 +109,28 @@ TableWalker::drain(DrainManager *dm)
 {
     unsigned int count = port.drain(dm);
 
-    if (stateQueueL1.empty() && stateQueueL2.empty() &&
-        pendingQueue.empty()) {
-        setDrainState(Drainable::Drained);
-        DPRINTF(Drain, "TableWalker free, no need to drain\n");
+    bool state_queues_not_empty = false;
 
-        // table walker is drained, but its ports may still need to be drained
-        return count;
-    } else {
+    for (int i = 0; i < MAX_LOOKUP_LEVELS; ++i) {
+        if (!stateQueues[i].empty()) {
+            state_queues_not_empty = true;
+            break;
+        }
+    }
+
+    if (state_queues_not_empty || pendingQueue.size()) {
         drainManager = dm;
         setDrainState(Drainable::Draining);
         DPRINTF(Drain, "TableWalker not drained\n");
 
         // return port drain count plus the table walker itself needs to drain
         return count + 1;
+    } else {
+        setDrainState(Drainable::Drained);
+        DPRINTF(Drain, "TableWalker free, no need to drain\n");
 
+        // table walker is drained, but its ports may still need to be drained
+        return count;
     }
 }
 
@@ -120,10 +154,13 @@ TableWalker::getMasterPort(const std::string &if_name, PortID idx)
 }
 
 Fault
-TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint8_t _cid, TLB::Mode _mode,
-            TLB::Translation *_trans, bool _timing, bool _functional)
+TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint16_t _asid,
+                  uint8_t _vmid, bool _isHyp, TLB::Mode _mode,
+                  TLB::Translation *_trans, bool _timing, bool _functional,
+                  bool secure, TLB::ArmTranslationType tranType)
 {
     assert(!(_functional && _timing));
+
     if (!currState) {
         // For atomic mode, a new WalkerState instance should be only created
         // once per TLB. For timing mode, a new instance is generated for every
@@ -139,41 +176,113 @@ TableWalker::walk(RequestPtr _req, ThreadContext *_tc, uint8_t _cid, TLB::Mode _
         // misprediction, in which case nothing will happen or we'll use
         // this fault to re-execute the faulting instruction which should clean
         // up everything.
-        if (currState->vaddr == _req->getVaddr()) {
+        if (currState->vaddr_tainted == _req->getVaddr()) {
             return new ReExec;
         }
-        panic("currState should always be empty in timing mode!\n");
     }
 
     currState->tc = _tc;
+    currState->aarch64 = opModeIs64(currOpMode(_tc));
+    currState->el = currEL(_tc);
     currState->transState = _trans;
     currState->req = _req;
     currState->fault = NoFault;
-    currState->contextId = _cid;
+    currState->asid = _asid;
+    currState->vmid = _vmid;
+    currState->isHyp = _isHyp;
     currState->timing = _timing;
     currState->functional = _functional;
     currState->mode = _mode;
+    currState->tranType = tranType;
+    currState->isSecure = secure;
+    currState->physAddrRange = physAddrRange;
 
     /** @todo These should be cached or grabbed from cached copies in
      the TLB, all these miscreg reads are expensive */
-    currState->vaddr = currState->req->getVaddr();
-    currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR);
+    currState->vaddr_tainted = currState->req->getVaddr();
+    if (currState->aarch64)
+        currState->vaddr = purifyTaggedAddr(currState->vaddr_tainted,
+                                            currState->tc, currState->el);
+    else
+        currState->vaddr = currState->vaddr_tainted;
+
+    if (currState->aarch64) {
+        switch (currState->el) {
+          case EL0:
+          case EL1:
+            currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL1);
+            currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL1);
+            break;
+          // @todo: uncomment this to enable Virtualization
+          // case EL2:
+          //   assert(haveVirtualization);
+          //   currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL2);
+          //   currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL2);
+          //   break;
+          case EL3:
+            assert(haveSecurity);
+            currState->sctlr = currState->tc->readMiscReg(MISCREG_SCTLR_EL3);
+            currState->ttbcr = currState->tc->readMiscReg(MISCREG_TCR_EL3);
+            break;
+          default:
+            panic("Invalid exception level");
+            break;
+        }
+    } else {
+        currState->sctlr = currState->tc->readMiscReg(flattenMiscRegNsBanked(
+            MISCREG_SCTLR, currState->tc, !currState->isSecure));
+        currState->ttbcr = currState->tc->readMiscReg(flattenMiscRegNsBanked(
+            MISCREG_TTBCR, currState->tc, !currState->isSecure));
+        currState->htcr  = currState->tc->readMiscReg(MISCREG_HTCR);
+        currState->hcr   = currState->tc->readMiscReg(MISCREG_HCR);
+        currState->vtcr  = currState->tc->readMiscReg(MISCREG_VTCR);
+    }
     sctlr = currState->sctlr;
-    currState->N = currState->tc->readMiscReg(MISCREG_TTBCR);
 
     currState->isFetch = (currState->mode == TLB::Execute);
     currState->isWrite = (currState->mode == TLB::Write);
 
+    // We only do a second stage of translation if we're not secure, or in
+    // hyp mode, the second stage MMU is enabled, and this table walker
+    // instance is the first stage.
+    currState->doingStage2 = false;
+    // @todo: for now disable this in AArch64 (HCR is not set)
+    currState->stage2Req = !currState->aarch64 && currState->hcr.vm &&
+                           !isStage2 && !currState->isSecure && !currState->isHyp;
 
-    if (!currState->timing)
-        return processWalk();
+    bool long_desc_format = currState->aarch64 ||
+                            (_haveLPAE && currState->ttbcr.eae) ||
+                            _isHyp || isStage2;
+
+    if (long_desc_format) {
+        // Helper variables used for hierarchical permissions
+        currState->secureLookup = currState->isSecure;
+        currState->rwTable = true;
+        currState->userTable = true;
+        currState->xnTable = false;
+        currState->pxnTable = false;
+    }
+
+    if (!currState->timing) {
+        if (currState->aarch64)
+            return processWalkAArch64();
+        else if (long_desc_format)
+            return processWalkLPAE();
+        else
+            return processWalk();
+    }
 
     if (pending || pendingQueue.size()) {
         pendingQueue.push_back(currState);
         currState = NULL;
     } else {
         pending = true;
-        return processWalk();
+        if (currState->aarch64)
+            return processWalkAArch64();
+        else if (long_desc_format)
+            return processWalkLPAE();
+        else
+            return processWalk();
     }
 
     return NoFault;
@@ -186,8 +295,17 @@ TableWalker::processWalkWrapper()
     assert(pendingQueue.size());
     currState = pendingQueue.front();
 
+    ExceptionLevel target_el = EL0;
+    if (currState->aarch64)
+        target_el = currEL(currState->tc);
+    else
+        target_el = EL1;
+
     // Check if a previous walk filled this request already
-    TlbEntry* te = tlb->lookup(currState->vaddr, currState->contextId, true);
+    // @TODO Should this always be the TLB or should we look in the stage2 TLB?
+    TlbEntry* te = tlb->lookup(currState->vaddr, currState->asid,
+            currState->vmid, currState->isHyp, currState->isSecure, true, false,
+            target_el);
 
     // Check if we still need to have a walk for this request. If the requesting
     // instruction has been squashed, or a previous walk has filled the TLB with
@@ -198,7 +316,12 @@ TableWalker::processWalkWrapper()
         // We've got a valid request, lets process it
         pending = true;
         pendingQueue.pop_front();
-        processWalk();
+        if (currState->aarch64)
+            processWalkAArch64();
+        else if ((_haveLPAE && currState->ttbcr.eae) || currState->isHyp || isStage2)
+            processWalkLPAE();
+        else
+            processWalk();
         return;
     }
 
@@ -212,7 +335,8 @@ TableWalker::processWalkWrapper()
         pendingQueue.pop_front();
         num_squashed++;
 
-        DPRINTF(TLB, "Squashing table walk for address %#x\n", currState->vaddr);
+        DPRINTF(TLB, "Squashing table walk for address %#x\n",
+                      currState->vaddr_tainted);
 
         if (currState->transState->squashed()) {
             // finish the translation which will delete the translation object
@@ -220,8 +344,9 @@ TableWalker::processWalkWrapper()
                     currState->req, currState->tc, currState->mode);
         } else {
             // translate the request now that we know it will work
-            currState->fault = tlb->translateTiming(currState->req, currState->tc,
-                                      currState->transState, currState->mode);
+            tlb->translateTiming(currState->req, currState->tc,
+                        currState->transState, currState->mode);
+
         }
 
         // delete the current request
@@ -230,7 +355,9 @@ TableWalker::processWalkWrapper()
         // peak at the next one
         if (pendingQueue.size()) {
             currState = pendingQueue.front();
-            te = tlb->lookup(currState->vaddr, currState->contextId, true);
+            te = tlb->lookup(currState->vaddr, currState->asid,
+                currState->vmid, currState->isHyp, currState->isSecure, true,
+                false, target_el);
         } else {
             // Terminate the loop, nothing more to do
             currState = NULL;
@@ -249,32 +376,62 @@ TableWalker::processWalk()
     Addr ttbr = 0;
 
     // If translation isn't enabled, we shouldn't be here
-    assert(currState->sctlr.m);
+    assert(currState->sctlr.m || isStage2);
 
-    DPRINTF(TLB, "Begining table walk for address %#x, TTBCR: %#x, bits:%#x\n",
-            currState->vaddr, currState->N, mbits(currState->vaddr, 31,
-            32-currState->N));
+    DPRINTF(TLB, "Beginning table walk for address %#x, TTBCR: %#x, bits:%#x\n",
+            currState->vaddr_tainted, currState->ttbcr, mbits(currState->vaddr, 31,
+                                                      32 - currState->ttbcr.n));
 
-    if (currState->N == 0 || !mbits(currState->vaddr, 31, 32-currState->N)) {
+    if (currState->ttbcr.n == 0 || !mbits(currState->vaddr, 31,
+                                          32 - currState->ttbcr.n)) {
         DPRINTF(TLB, " - Selecting TTBR0\n");
-        ttbr = currState->tc->readMiscReg(MISCREG_TTBR0);
+        // Check if table walk is allowed when Security Extensions are enabled
+        if (haveSecurity && currState->ttbcr.pd0) {
+            if (currState->isFetch)
+                return new PrefetchAbort(currState->vaddr_tainted,
+                                         ArmFault::TranslationLL + L1,
+                                         isStage2,
+                                         ArmFault::VmsaTran);
+            else
+                return new DataAbort(currState->vaddr_tainted,
+                        TlbEntry::DomainType::NoAccess, currState->isWrite,
+                                     ArmFault::TranslationLL + L1, isStage2,
+                                     ArmFault::VmsaTran);
+        }
+        ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked(
+            MISCREG_TTBR0, currState->tc, !currState->isSecure));
     } else {
         DPRINTF(TLB, " - Selecting TTBR1\n");
-        ttbr = currState->tc->readMiscReg(MISCREG_TTBR1);
-        currState->N = 0;
+        // Check if table walk is allowed when Security Extensions are enabled
+        if (haveSecurity && currState->ttbcr.pd1) {
+            if (currState->isFetch)
+                return new PrefetchAbort(currState->vaddr_tainted,
+                                         ArmFault::TranslationLL + L1,
+                                         isStage2,
+                                         ArmFault::VmsaTran);
+            else
+                return new DataAbort(currState->vaddr_tainted,
+                        TlbEntry::DomainType::NoAccess, currState->isWrite,
+                                     ArmFault::TranslationLL + L1, isStage2,
+                                     ArmFault::VmsaTran);
+        }
+        ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked(
+            MISCREG_TTBR1, currState->tc, !currState->isSecure));
+        currState->ttbcr.n = 0;
     }
 
-    Addr l1desc_addr = mbits(ttbr, 31, 14-currState->N) |
-                       (bits(currState->vaddr,31-currState->N,20) << 2);
-    DPRINTF(TLB, " - Descriptor at address %#x\n", l1desc_addr);
-
+    Addr l1desc_addr = mbits(ttbr, 31, 14 - currState->ttbcr.n) |
+        (bits(currState->vaddr, 31 - currState->ttbcr.n, 20) << 2);
+    DPRINTF(TLB, " - Descriptor at address %#x (%s)\n", l1desc_addr,
+            currState->isSecure ? "s" : "ns");
 
     // Trickbox address check
     Fault f;
-    f = tlb->walkTrickBoxCheck(l1desc_addr, currState->vaddr, sizeof(uint32_t),
-            currState->isFetch, currState->isWrite, 0, true);
+    f = tlb->walkTrickBoxCheck(l1desc_addr, currState->isSecure,
+            currState->vaddr, sizeof(uint32_t), currState->isFetch,
+            currState->isWrite, TlbEntry::DomainType::NoAccess, L1);
     if (f) {
-        DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr);
+        DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted);
         if (currState->timing) {
             pending = false;
             nextWalk(currState->tc);
@@ -291,28 +448,422 @@ TableWalker::processWalk()
         flag = Request::UNCACHEABLE;
     }
 
+    bool delayed;
+    delayed = fetchDescriptor(l1desc_addr, (uint8_t*)&currState->l1Desc.data,
+                              sizeof(uint32_t), flag, L1, &doL1DescEvent,
+                              &TableWalker::doL1Descriptor);
+    if (!delayed) {
+       f = currState->fault;
+    }
+
+    return f;
+}
+
+Fault
+TableWalker::processWalkLPAE()
+{
+    Addr ttbr, ttbr0_max, ttbr1_min, desc_addr;
+    int tsz, n;
+    LookupLevel start_lookup_level = L1;
+
+    DPRINTF(TLB, "Beginning table walk for address %#x, TTBCR: %#x\n",
+            currState->vaddr_tainted, currState->ttbcr);
+
+    Request::Flags flag = 0;
+    if (currState->isSecure)
+        flag.set(Request::SECURE);
+
+    // work out which base address register to use, if in hyp mode we always
+    // use HTTBR
+    if (isStage2) {
+        DPRINTF(TLB, " - Selecting VTTBR (long-desc.)\n");
+        ttbr = currState->tc->readMiscReg(MISCREG_VTTBR);
+        tsz  = sext<4>(currState->vtcr.t0sz);
+        start_lookup_level = currState->vtcr.sl0 ? L1 : L2;
+    } else if (currState->isHyp) {
+        DPRINTF(TLB, " - Selecting HTTBR (long-desc.)\n");
+        ttbr = currState->tc->readMiscReg(MISCREG_HTTBR);
+        tsz  = currState->htcr.t0sz;
+    } else {
+        assert(_haveLPAE && currState->ttbcr.eae);
+
+        // Determine boundaries of TTBR0/1 regions
+        if (currState->ttbcr.t0sz)
+            ttbr0_max = (1ULL << (32 - currState->ttbcr.t0sz)) - 1;
+        else if (currState->ttbcr.t1sz)
+            ttbr0_max = (1ULL << 32) -
+                (1ULL << (32 - currState->ttbcr.t1sz)) - 1;
+        else
+            ttbr0_max = (1ULL << 32) - 1;
+        if (currState->ttbcr.t1sz)
+            ttbr1_min = (1ULL << 32) - (1ULL << (32 - currState->ttbcr.t1sz));
+        else
+            ttbr1_min = (1ULL << (32 - currState->ttbcr.t0sz));
+
+        // The following code snippet selects the appropriate translation table base
+        // address (TTBR0 or TTBR1) and the appropriate starting lookup level
+        // depending on the address range supported by the translation table (ARM
+        // ARM issue C B3.6.4)
+        if (currState->vaddr <= ttbr0_max) {
+            DPRINTF(TLB, " - Selecting TTBR0 (long-desc.)\n");
+            // Check if table walk is allowed
+            if (currState->ttbcr.epd0) {
+                if (currState->isFetch)
+                    return new PrefetchAbort(currState->vaddr_tainted,
+                                             ArmFault::TranslationLL + L1,
+                                             isStage2,
+                                             ArmFault::LpaeTran);
+                else
+                    return new DataAbort(currState->vaddr_tainted,
+                                         TlbEntry::DomainType::NoAccess,
+                                         currState->isWrite,
+                                         ArmFault::TranslationLL + L1,
+                                         isStage2,
+                                         ArmFault::LpaeTran);
+            }
+            ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked(
+                MISCREG_TTBR0, currState->tc, !currState->isSecure));
+            tsz = currState->ttbcr.t0sz;
+            if (ttbr0_max < (1ULL << 30))  // Upper limit < 1 GB
+                start_lookup_level = L2;
+        } else if (currState->vaddr >= ttbr1_min) {
+            DPRINTF(TLB, " - Selecting TTBR1 (long-desc.)\n");
+            // Check if table walk is allowed
+            if (currState->ttbcr.epd1) {
+                if (currState->isFetch)
+                    return new PrefetchAbort(currState->vaddr_tainted,
+                                             ArmFault::TranslationLL + L1,
+                                             isStage2,
+                                             ArmFault::LpaeTran);
+                else
+                    return new DataAbort(currState->vaddr_tainted,
+                                         TlbEntry::DomainType::NoAccess,
+                                         currState->isWrite,
+                                         ArmFault::TranslationLL + L1,
+                                         isStage2,
+                                         ArmFault::LpaeTran);
+            }
+            ttbr = currState->tc->readMiscReg(flattenMiscRegNsBanked(
+                MISCREG_TTBR1, currState->tc, !currState->isSecure));
+            tsz = currState->ttbcr.t1sz;
+            if (ttbr1_min >= (1ULL << 31) + (1ULL << 30))  // Lower limit >= 3 GB
+                start_lookup_level = L2;
+        } else {
+            // Out of boundaries -> translation fault
+            if (currState->isFetch)
+                return new PrefetchAbort(currState->vaddr_tainted,
+                                         ArmFault::TranslationLL + L1,
+                                         isStage2,
+                                         ArmFault::LpaeTran);
+            else
+                return new DataAbort(currState->vaddr_tainted,
+                                     TlbEntry::DomainType::NoAccess,
+                                     currState->isWrite, ArmFault::TranslationLL + L1,
+                                     isStage2, ArmFault::LpaeTran);
+        }
+
+    }
+
+    // Perform lookup (ARM ARM issue C B3.6.6)
+    if (start_lookup_level == L1) {
+        n = 5 - tsz;
+        desc_addr = mbits(ttbr, 39, n) |
+            (bits(currState->vaddr, n + 26, 30) << 3);
+        DPRINTF(TLB, " - Descriptor at address %#x (%s) (long-desc.)\n",
+                desc_addr, currState->isSecure ? "s" : "ns");
+    } else {
+        // Skip first-level lookup
+        n = (tsz >= 2 ? 14 - tsz : 12);
+        desc_addr = mbits(ttbr, 39, n) |
+            (bits(currState->vaddr, n + 17, 21) << 3);
+        DPRINTF(TLB, " - Descriptor at address %#x (%s) (long-desc.)\n",
+                desc_addr, currState->isSecure ? "s" : "ns");
+    }
+
+    // Trickbox address check
+    Fault f = tlb->walkTrickBoxCheck(desc_addr, currState->isSecure,
+                        currState->vaddr, sizeof(uint64_t), currState->isFetch,
+                        currState->isWrite, TlbEntry::DomainType::NoAccess,
+                        start_lookup_level);
+    if (f) {
+        DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted);
+        if (currState->timing) {
+            pending = false;
+            nextWalk(currState->tc);
+            currState = NULL;
+        } else {
+            currState->tc = NULL;
+            currState->req = NULL;
+        }
+        return f;
+    }
+
+    if (currState->sctlr.c == 0) {
+        flag = Request::UNCACHEABLE;
+    }
+
+    if (currState->isSecure)
+        flag.set(Request::SECURE);
+
+    currState->longDesc.lookupLevel = start_lookup_level;
+    currState->longDesc.aarch64 = false;
+    currState->longDesc.largeGrain = false;
+    currState->longDesc.grainSize = 12;
+
+    Event *event = start_lookup_level == L1 ? (Event *) &doL1LongDescEvent
+                                            : (Event *) &doL2LongDescEvent;
+
+    bool delayed = fetchDescriptor(desc_addr, (uint8_t*)&currState->longDesc.data,
+                                   sizeof(uint64_t), flag, start_lookup_level,
+                                   event, &TableWalker::doLongDescriptor);
+    if (!delayed) {
+        f = currState->fault;
+    }
+
+    return f;
+}
+
+unsigned
+TableWalker::adjustTableSizeAArch64(unsigned tsz)
+{
+    if (tsz < 25)
+        return 25;
+    if (tsz > 48)
+        return 48;
+    return tsz;
+}
+
+bool
+TableWalker::checkAddrSizeFaultAArch64(Addr addr, int currPhysAddrRange)
+{
+    return (currPhysAddrRange != MaxPhysAddrRange &&
+            bits(addr, MaxPhysAddrRange - 1, currPhysAddrRange));
+}
+
+Fault
+TableWalker::processWalkAArch64()
+{
+    assert(currState->aarch64);
+
+    DPRINTF(TLB, "Beginning table walk for address %#llx, TTBCR: %#llx\n",
+            currState->vaddr_tainted, currState->ttbcr);
+
+    // Determine TTBR, table size, granule size and phys. address range
+    Addr ttbr = 0;
+    int tsz = 0, ps = 0;
+    bool large_grain = false;
+    bool fault = false;
+    switch (currState->el) {
+      case EL0:
+      case EL1:
+        switch (bits(currState->vaddr, 63,48)) {
+          case 0:
+            DPRINTF(TLB, " - Selecting TTBR0 (AArch64)\n");
+            ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL1);
+            tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t0sz);
+            large_grain = currState->ttbcr.tg0;
+            if (bits(currState->vaddr, 63, tsz) != 0x0 ||
+                currState->ttbcr.epd0)
+              fault = true;
+            break;
+          case 0xffff:
+            DPRINTF(TLB, " - Selecting TTBR1 (AArch64)\n");
+            ttbr = currState->tc->readMiscReg(MISCREG_TTBR1_EL1);
+            tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t1sz);
+            large_grain = currState->ttbcr.tg1;
+            if (bits(currState->vaddr, 63, tsz) != mask(64-tsz) ||
+                currState->ttbcr.epd1)
+              fault = true;
+            break;
+          default:
+            // top two bytes must be all 0s or all 1s, else invalid addr
+            fault = true;
+        }
+        ps = currState->ttbcr.ips;
+        break;
+      case EL2:
+      case EL3:
+        switch(bits(currState->vaddr, 63,48)) {
+            case 0:
+        DPRINTF(TLB, " - Selecting TTBR0 (AArch64)\n");
+        if (currState->el == EL2)
+            ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL2);
+        else
+            ttbr = currState->tc->readMiscReg(MISCREG_TTBR0_EL3);
+        tsz = adjustTableSizeAArch64(64 - currState->ttbcr.t0sz);
+        large_grain = currState->ttbcr.tg0;
+                break;
+            default:
+                // invalid addr if top two bytes are not all 0s
+            fault = true;
+        }
+        ps = currState->ttbcr.ps;
+        break;
+    }
+
+    if (fault) {
+        Fault f;
+        if (currState->isFetch)
+            f =  new PrefetchAbort(currState->vaddr_tainted,
+                                     ArmFault::TranslationLL + L0, isStage2,
+                                     ArmFault::LpaeTran);
+        else
+            f = new DataAbort(currState->vaddr_tainted,
+                                 TlbEntry::DomainType::NoAccess,
+                                 currState->isWrite,
+                                 ArmFault::TranslationLL + L0,
+                                 isStage2, ArmFault::LpaeTran);
+
+        if (currState->timing) {
+            pending = false;
+            nextWalk(currState->tc);
+            currState = NULL;
+        } else {
+            currState->tc = NULL;
+            currState->req = NULL;
+        }
+        return f;
+
+    }
+
+    // Determine starting lookup level
+    LookupLevel start_lookup_level;
+    int grain_size, stride;
+    if (large_grain) {  // 64 KB granule
+        grain_size = 16;
+        stride = grain_size - 3;
+        if (tsz > grain_size + 2 * stride)
+            start_lookup_level = L1;
+        else if (tsz > grain_size + stride)
+            start_lookup_level = L2;
+        else
+            start_lookup_level = L3;
+    } else {  // 4 KB granule
+        grain_size = 12;
+        stride = grain_size - 3;
+        if (tsz > grain_size + 3 * stride)
+            start_lookup_level = L0;
+        else if (tsz > grain_size + 2 * stride)
+            start_lookup_level = L1;
+        else
+            start_lookup_level = L2;
+    }
+
+    // Determine table base address
+    int base_addr_lo = 3 + tsz - stride * (3 - start_lookup_level) -
+        grain_size;
+    Addr base_addr = mbits(ttbr, 47, base_addr_lo);
+
+    // Determine physical address size and raise an Address Size Fault if
+    // necessary
+    int pa_range = decodePhysAddrRange64(ps);
+    // Clamp to lower limit
+    if (pa_range > physAddrRange)
+        currState->physAddrRange = physAddrRange;
+    else
+        currState->physAddrRange = pa_range;
+    if (checkAddrSizeFaultAArch64(base_addr, currState->physAddrRange)) {
+        DPRINTF(TLB, "Address size fault before any lookup\n");
+        Fault f;
+        if (currState->isFetch)
+            f = new PrefetchAbort(currState->vaddr_tainted,
+                                     ArmFault::AddressSizeLL + start_lookup_level,
+                                     isStage2,
+                                     ArmFault::LpaeTran);
+        else
+            f = new DataAbort(currState->vaddr_tainted,
+                                 TlbEntry::DomainType::NoAccess,
+                                 currState->isWrite,
+                                 ArmFault::AddressSizeLL + start_lookup_level,
+                                 isStage2,
+                                 ArmFault::LpaeTran);
+
+
+        if (currState->timing) {
+            pending = false;
+            nextWalk(currState->tc);
+            currState = NULL;
+        } else {
+            currState->tc = NULL;
+            currState->req = NULL;
+        }
+        return f;
+
+   }
+
+    // Determine descriptor address
+    Addr desc_addr = base_addr |
+        (bits(currState->vaddr, tsz - 1,
+              stride * (3 - start_lookup_level) + grain_size) << 3);
+
+    // Trickbox address check
+    Fault f = tlb->walkTrickBoxCheck(desc_addr, currState->isSecure,
+                        currState->vaddr, sizeof(uint64_t), currState->isFetch,
+                        currState->isWrite, TlbEntry::DomainType::NoAccess,
+                        start_lookup_level);
+    if (f) {
+        DPRINTF(TLB, "Trickbox check caused fault on %#x\n", currState->vaddr_tainted);
+        if (currState->timing) {
+            pending = false;
+            nextWalk(currState->tc);
+            currState = NULL;
+        } else {
+            currState->tc = NULL;
+            currState->req = NULL;
+        }
+        return f;
+    }
+
+    Request::Flags flag = 0;
+    if (currState->sctlr.c == 0) {
+        flag = Request::UNCACHEABLE;
+    }
+
+    currState->longDesc.lookupLevel = start_lookup_level;
+    currState->longDesc.aarch64 = true;
+    currState->longDesc.largeGrain = large_grain;
+    currState->longDesc.grainSize = grain_size;
+
     if (currState->timing) {
-        port.dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t),
-                       &doL1DescEvent, (uint8_t*)&currState->l1Desc.data,
+        Event *event;
+        switch (start_lookup_level) {
+          case L0:
+            event = (Event *) &doL0LongDescEvent;
+            break;
+          case L1:
+            event = (Event *) &doL1LongDescEvent;
+            break;
+          case L2:
+            event = (Event *) &doL2LongDescEvent;
+            break;
+          case L3:
+            event = (Event *) &doL3LongDescEvent;
+            break;
+          default:
+            panic("Invalid table lookup level");
+            break;
+        }
+        port.dmaAction(MemCmd::ReadReq, desc_addr, sizeof(uint64_t), event,
+                       (uint8_t*) &currState->longDesc.data,
                        currState->tc->getCpuPtr()->clockPeriod(), flag);
-        DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before "
-                "adding: %d\n",
-                stateQueueL1.size());
-        stateQueueL1.push_back(currState);
+        DPRINTF(TLBVerbose,
+                "Adding to walker fifo: queue size before adding: %d\n",
+                stateQueues[start_lookup_level].size());
+        stateQueues[start_lookup_level].push_back(currState);
         currState = NULL;
     } else if (!currState->functional) {
-        port.dmaAction(MemCmd::ReadReq, l1desc_addr, sizeof(uint32_t),
-                       NULL, (uint8_t*)&currState->l1Desc.data,
+        port.dmaAction(MemCmd::ReadReq, desc_addr, sizeof(uint64_t),
+                       NULL, (uint8_t*) &currState->longDesc.data,
                        currState->tc->getCpuPtr()->clockPeriod(), flag);
-        doL1Descriptor();
+        doLongDescriptor();
         f = currState->fault;
     } else {
-        RequestPtr req = new Request(l1desc_addr, sizeof(uint32_t), flag, masterId);
-        req->taskId(ContextSwitchTaskId::DMA);
+        RequestPtr req = new Request(desc_addr, sizeof(uint64_t), flag,
+                                     masterId);
         PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
-        pkt->dataStatic((uint8_t*)&currState->l1Desc.data);
+        pkt->dataStatic((uint8_t*) &currState->longDesc.data);
         port.sendFunctional(pkt);
-        doL1Descriptor();
+        doLongDescriptor();
         delete req;
         delete pkt;
         f = currState->fault;
@@ -330,38 +881,38 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
     DPRINTF(TLBVerbose, "memAttrs texcb:%d s:%d\n", texcb, s);
     te.shareable = false; // default value
     te.nonCacheable = false;
-    bool outer_shareable = false;
+    te.outerShareable = false;
     if (sctlr.tre == 0 || ((sctlr.tre == 1) && (sctlr.m == 0))) {
         switch(texcb) {
           case 0: // Stongly-ordered
             te.nonCacheable = true;
-            te.mtype = TlbEntry::StronglyOrdered;
+            te.mtype = TlbEntry::MemoryType::StronglyOrdered;
             te.shareable = true;
             te.innerAttrs = 1;
             te.outerAttrs = 0;
             break;
           case 1: // Shareable Device
             te.nonCacheable = true;
-            te.mtype = TlbEntry::Device;
+            te.mtype = TlbEntry::MemoryType::Device;
             te.shareable = true;
             te.innerAttrs = 3;
             te.outerAttrs = 0;
             break;
           case 2: // Outer and Inner Write-Through, no Write-Allocate
-            te.mtype = TlbEntry::Normal;
+            te.mtype = TlbEntry::MemoryType::Normal;
             te.shareable = s;
             te.innerAttrs = 6;
             te.outerAttrs = bits(texcb, 1, 0);
             break;
           case 3: // Outer and Inner Write-Back, no Write-Allocate
-            te.mtype = TlbEntry::Normal;
+            te.mtype = TlbEntry::MemoryType::Normal;
             te.shareable = s;
             te.innerAttrs = 7;
             te.outerAttrs = bits(texcb, 1, 0);
             break;
           case 4: // Outer and Inner Non-cacheable
             te.nonCacheable = true;
-            te.mtype = TlbEntry::Normal;
+            te.mtype = TlbEntry::MemoryType::Normal;
             te.shareable = s;
             te.innerAttrs = 0;
             te.outerAttrs = bits(texcb, 1, 0);
@@ -373,14 +924,14 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
             panic("Implementation-defined texcb value!\n");
             break;
           case 7: // Outer and Inner Write-Back, Write-Allocate
-            te.mtype = TlbEntry::Normal;
+            te.mtype = TlbEntry::MemoryType::Normal;
             te.shareable = s;
             te.innerAttrs = 5;
             te.outerAttrs = 1;
             break;
           case 8: // Non-shareable Device
             te.nonCacheable = true;
-            te.mtype = TlbEntry::Device;
+            te.mtype = TlbEntry::MemoryType::Device;
             te.shareable = false;
             te.innerAttrs = 3;
             te.outerAttrs = 0;
@@ -389,7 +940,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
             panic("Reserved texcb value!\n");
             break;
           case 16 ... 31: // Cacheable Memory
-            te.mtype = TlbEntry::Normal;
+            te.mtype = TlbEntry::MemoryType::Normal;
             te.shareable = s;
             if (bits(texcb, 1,0) == 0 || bits(texcb, 3,2) == 0)
                 te.nonCacheable = true;
@@ -401,8 +952,10 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
         }
     } else {
         assert(tc);
-        PRRR prrr = tc->readMiscReg(MISCREG_PRRR);
-        NMRR nmrr = tc->readMiscReg(MISCREG_NMRR);
+        PRRR prrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_PRRR,
+                                    currState->tc, !currState->isSecure));
+        NMRR nmrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_NMRR,
+                                    currState->tc, !currState->isSecure));
         DPRINTF(TLBVerbose, "memAttrs PRRR:%08x NMRR:%08x\n", prrr, nmrr);
         uint8_t curr_tr = 0, curr_ir = 0, curr_or = 0;
         switch(bits(texcb, 2,0)) {
@@ -410,37 +963,37 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
             curr_tr = prrr.tr0;
             curr_ir = nmrr.ir0;
             curr_or = nmrr.or0;
-            outer_shareable = (prrr.nos0 == 0);
+            te.outerShareable = (prrr.nos0 == 0);
             break;
           case 1:
             curr_tr = prrr.tr1;
             curr_ir = nmrr.ir1;
             curr_or = nmrr.or1;
-            outer_shareable = (prrr.nos1 == 0);
+            te.outerShareable = (prrr.nos1 == 0);
             break;
           case 2:
             curr_tr = prrr.tr2;
             curr_ir = nmrr.ir2;
             curr_or = nmrr.or2;
-            outer_shareable = (prrr.nos2 == 0);
+            te.outerShareable = (prrr.nos2 == 0);
             break;
           case 3:
             curr_tr = prrr.tr3;
             curr_ir = nmrr.ir3;
             curr_or = nmrr.or3;
-            outer_shareable = (prrr.nos3 == 0);
+            te.outerShareable = (prrr.nos3 == 0);
             break;
           case 4:
             curr_tr = prrr.tr4;
             curr_ir = nmrr.ir4;
             curr_or = nmrr.or4;
-            outer_shareable = (prrr.nos4 == 0);
+            te.outerShareable = (prrr.nos4 == 0);
             break;
           case 5:
             curr_tr = prrr.tr5;
             curr_ir = nmrr.ir5;
             curr_or = nmrr.or5;
-            outer_shareable = (prrr.nos5 == 0);
+            te.outerShareable = (prrr.nos5 == 0);
             break;
           case 6:
             panic("Imp defined type\n");
@@ -448,14 +1001,14 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
             curr_tr = prrr.tr7;
             curr_ir = nmrr.ir7;
             curr_or = nmrr.or7;
-            outer_shareable = (prrr.nos7 == 0);
+            te.outerShareable = (prrr.nos7 == 0);
             break;
         }
 
         switch(curr_tr) {
           case 0:
             DPRINTF(TLBVerbose, "StronglyOrdered\n");
-            te.mtype = TlbEntry::StronglyOrdered;
+            te.mtype = TlbEntry::MemoryType::StronglyOrdered;
             te.nonCacheable = true;
             te.innerAttrs = 1;
             te.outerAttrs = 0;
@@ -464,7 +1017,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
           case 1:
             DPRINTF(TLBVerbose, "Device ds1:%d ds0:%d s:%d\n",
                     prrr.ds1, prrr.ds0, s);
-            te.mtype = TlbEntry::Device;
+            te.mtype = TlbEntry::MemoryType::Device;
             te.nonCacheable = true;
             te.innerAttrs = 3;
             te.outerAttrs = 0;
@@ -476,7 +1029,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
           case 2:
             DPRINTF(TLBVerbose, "Normal ns1:%d ns0:%d s:%d\n",
                     prrr.ns1, prrr.ns0, s);
-            te.mtype = TlbEntry::Normal;
+            te.mtype = TlbEntry::MemoryType::Normal;
             if (prrr.ns1 && s)
                 te.shareable = true;
             if (prrr.ns0 && !s)
@@ -486,7 +1039,7 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
             panic("Reserved type");
         }
 
-        if (te.mtype == TlbEntry::Normal){
+        if (te.mtype == TlbEntry::MemoryType::Normal){
             switch(curr_ir) {
               case 0:
                 te.nonCacheable = true;
@@ -523,40 +1076,192 @@ TableWalker::memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
     DPRINTF(TLBVerbose, "memAttrs: shareable: %d, innerAttrs: %d, \
             outerAttrs: %d\n",
             te.shareable, te.innerAttrs, te.outerAttrs);
+    te.setAttributes(false);
+}
 
-    /** Formatting for Physical Address Register (PAR)
-     *  Only including lower bits (TLB info here)
-     *  PAR:
-     *  PA [31:12]
-     *  Reserved [11]
-     *  TLB info [10:1]
-     *      NOS  [10] (Not Outer Sharable)
-     *      NS   [9]  (Non-Secure)
-     *      --   [8]  (Implementation Defined)
-     *      SH   [7]  (Sharable)
-     *      Inner[6:4](Inner memory attributes)
-     *      Outer[3:2](Outer memory attributes)
-     *      SS   [1]  (SuperSection)
-     *      F    [0]  (Fault, Fault Status in [6:1] if faulted)
-     */
-    te.attributes = (
-                ((outer_shareable ? 0:1) << 10) |
-                // TODO: NS Bit
-                ((te.shareable ? 1:0) << 7) |
-                (te.innerAttrs << 4) |
-                (te.outerAttrs << 2)
-                // TODO: Supersection bit
-                // TODO: Fault bit
-                );
+void
+TableWalker::memAttrsLPAE(ThreadContext *tc, TlbEntry &te,
+    LongDescriptor &lDescriptor)
+{
+    assert(_haveLPAE);
 
+    uint8_t attr;
+    uint8_t sh = lDescriptor.sh();
+    // Different format and source of attributes if this is a stage 2
+    // translation
+    if (isStage2) {
+        attr = lDescriptor.memAttr();
+        uint8_t attr_3_2 = (attr >> 2) & 0x3;
+        uint8_t attr_1_0 =  attr       & 0x3;
 
+        DPRINTF(TLBVerbose, "memAttrsLPAE MemAttr:%#x sh:%#x\n", attr, sh);
+
+        if (attr_3_2 == 0) {
+            te.mtype        = attr_1_0 == 0 ? TlbEntry::MemoryType::StronglyOrdered
+                                            : TlbEntry::MemoryType::Device;
+            te.outerAttrs   = 0;
+            te.innerAttrs   = attr_1_0 == 0 ? 1 : 3;
+            te.nonCacheable = true;
+        } else {
+            te.mtype        = TlbEntry::MemoryType::Normal;
+            te.outerAttrs   = attr_3_2 == 1 ? 0 :
+                              attr_3_2 == 2 ? 2 : 1;
+            te.innerAttrs   = attr_1_0 == 1 ? 0 :
+                              attr_1_0 == 2 ? 6 : 5;
+            te.nonCacheable = (attr_3_2 == 1) || (attr_1_0 == 1);
+        }
+    } else {
+        uint8_t attrIndx = lDescriptor.attrIndx();
+
+        // LPAE always uses remapping of memory attributes, irrespective of the
+        // value of SCTLR.TRE
+        int reg = attrIndx & 0x4 ? MISCREG_MAIR1 : MISCREG_MAIR0;
+        reg     = flattenMiscRegNsBanked(reg, currState->tc, !currState->isSecure);
+        uint32_t mair = currState->tc->readMiscReg(reg);
+        attr = (mair >> (8 * (attrIndx % 4))) & 0xff;
+        uint8_t attr_7_4 = bits(attr, 7, 4);
+        uint8_t attr_3_0 = bits(attr, 3, 0);
+        DPRINTF(TLBVerbose, "memAttrsLPAE AttrIndx:%#x sh:%#x, attr %#x\n", attrIndx, sh, attr);
+
+        // Note: the memory subsystem only cares about the 'cacheable' memory
+        // attribute. The other attributes are only used to fill the PAR register
+        // accordingly to provide the illusion of full support
+        te.nonCacheable = false;
+
+        switch (attr_7_4) {
+          case 0x0:
+            // Strongly-ordered or Device memory
+            if (attr_3_0 == 0x0)
+                te.mtype = TlbEntry::MemoryType::StronglyOrdered;
+            else if (attr_3_0 == 0x4)
+                te.mtype = TlbEntry::MemoryType::Device;
+            else
+                panic("Unpredictable behavior\n");
+            te.nonCacheable = true;
+            te.outerAttrs   = 0;
+            break;
+          case 0x4:
+            // Normal memory, Outer Non-cacheable
+            te.mtype = TlbEntry::MemoryType::Normal;
+            te.outerAttrs = 0;
+            if (attr_3_0 == 0x4)
+                // Inner Non-cacheable
+                te.nonCacheable = true;
+            else if (attr_3_0 < 0x8)
+                panic("Unpredictable behavior\n");
+            break;
+          case 0x8:
+          case 0x9:
+          case 0xa:
+          case 0xb:
+          case 0xc:
+          case 0xd:
+          case 0xe:
+          case 0xf:
+            if (attr_7_4 & 0x4) {
+                te.outerAttrs = (attr_7_4 & 1) ? 1 : 3;
+            } else {
+                te.outerAttrs = 0x2;
+            }
+            // Normal memory, Outer Cacheable
+            te.mtype = TlbEntry::MemoryType::Normal;
+            if (attr_3_0 != 0x4 && attr_3_0 < 0x8)
+                panic("Unpredictable behavior\n");
+            break;
+          default:
+            panic("Unpredictable behavior\n");
+            break;
+        }
+
+        switch (attr_3_0) {
+          case 0x0:
+            te.innerAttrs = 0x1;
+            break;
+          case 0x4:
+            te.innerAttrs = attr_7_4 == 0 ? 0x3 : 0;
+            break;
+          case 0x8:
+          case 0x9:
+          case 0xA:
+          case 0xB:
+            te.innerAttrs = 6;
+            break;
+          case 0xC:
+          case 0xD:
+          case 0xE:
+          case 0xF:
+            te.innerAttrs = attr_3_0 & 1 ? 0x5 : 0x7;
+            break;
+          default:
+            panic("Unpredictable behavior\n");
+            break;
+        }
+    }
+
+    te.outerShareable = sh == 2;
+    te.shareable       = (sh & 0x2) ? true : false;
+    te.setAttributes(true);
+    te.attributes |= (uint64_t) attr << 56;
+}
+
+void
+TableWalker::memAttrsAArch64(ThreadContext *tc, TlbEntry &te, uint8_t attrIndx,
+                             uint8_t sh)
+{
+    DPRINTF(TLBVerbose, "memAttrsAArch64 AttrIndx:%#x sh:%#x\n", attrIndx, sh);
+
+    // Select MAIR
+    uint64_t mair;
+    switch (currState->el) {
+      case EL0:
+      case EL1:
+        mair = tc->readMiscReg(MISCREG_MAIR_EL1);
+        break;
+      case EL2:
+        mair = tc->readMiscReg(MISCREG_MAIR_EL2);
+        break;
+      case EL3:
+        mair = tc->readMiscReg(MISCREG_MAIR_EL3);
+        break;
+      default:
+        panic("Invalid exception level");
+        break;
+    }
+
+    // Select attributes
+    uint8_t attr = bits(mair, 8 * attrIndx + 7, 8 * attrIndx);
+    uint8_t attr_lo = bits(attr, 3, 0);
+    uint8_t attr_hi = bits(attr, 7, 4);
+
+    // Memory type
+    te.mtype = attr_hi == 0 ? TlbEntry::MemoryType::Device : TlbEntry::MemoryType::Normal;
+
+    // Cacheability
+    te.nonCacheable = false;
+    if (te.mtype == TlbEntry::MemoryType::Device ||  // Device memory
+        attr_hi == 0x8 ||  // Normal memory, Outer Non-cacheable
+        attr_lo == 0x8) {  // Normal memory, Inner Non-cacheable
+        te.nonCacheable = true;
+    }
+
+    te.shareable       = sh == 2;
+    te.outerShareable = (sh & 0x2) ? true : false;
+    // Attributes formatted according to the 64-bit PAR
+    te.attributes = ((uint64_t) attr << 56) |
+        (1 << 11) |     // LPAE bit
+        (te.ns << 9) |  // NS bit
+        (sh << 7);
 }
 
 void
 TableWalker::doL1Descriptor()
 {
+    if (currState->fault != NoFault) {
+        return;
+    }
+
     DPRINTF(TLB, "L1 descriptor for %#x is %#x\n",
-            currState->vaddr, currState->l1Desc.data);
+            currState->vaddr_tainted, currState->l1Desc.data);
     TlbEntry te;
 
     switch (currState->l1Desc.type()) {
@@ -569,11 +1274,17 @@ TableWalker::doL1Descriptor()
         DPRINTF(TLB, "L1 Descriptor Reserved/Ignore, causing fault\n");
         if (currState->isFetch)
             currState->fault =
-                new PrefetchAbort(currState->vaddr, ArmFault::Translation0);
+                new PrefetchAbort(currState->vaddr_tainted,
+                                  ArmFault::TranslationLL + L1,
+                                  isStage2,
+                                  ArmFault::VmsaTran);
         else
             currState->fault =
-                new DataAbort(currState->vaddr, 0, currState->isWrite,
-                                  ArmFault::Translation0);
+                new DataAbort(currState->vaddr_tainted,
+                              TlbEntry::DomainType::NoAccess,
+                              currState->isWrite,
+                              ArmFault::TranslationLL + L1, isStage2,
+                              ArmFault::VmsaTran);
         return;
       case L1Descriptor::Section:
         if (currState->sctlr.afe && bits(currState->l1Desc.ap(), 0) == 0) {
@@ -582,85 +1293,251 @@ TableWalker::doL1Descriptor()
               * AccessFlag0
               */
 
-            currState->fault = new DataAbort(currState->vaddr,
-                                    currState->l1Desc.domain(), currState->isWrite,
-                                    ArmFault::AccessFlag0);
+            currState->fault = new DataAbort(currState->vaddr_tainted,
+                                             currState->l1Desc.domain(),
+                                             currState->isWrite,
+                                             ArmFault::AccessFlagLL + L1,
+                                             isStage2,
+                                             ArmFault::VmsaTran);
         }
         if (currState->l1Desc.supersection()) {
             panic("Haven't implemented supersections\n");
         }
-        te.N = 20;
-        te.pfn = currState->l1Desc.pfn();
-        te.size = (1<<te.N) - 1;
-        te.global = !currState->l1Desc.global();
-        te.valid = true;
-        te.vpn = currState->vaddr >> te.N;
-        te.sNp = true;
-        te.xn = currState->l1Desc.xn();
-        te.ap = currState->l1Desc.ap();
-        te.domain = currState->l1Desc.domain();
-        te.asid = currState->contextId;
-        memAttrs(currState->tc, te, currState->sctlr,
-                currState->l1Desc.texcb(), currState->l1Desc.shareable());
+        insertTableEntry(currState->l1Desc, false);
+        return;
+      case L1Descriptor::PageTable:
+        {
+            Addr l2desc_addr;
+            l2desc_addr = currState->l1Desc.l2Addr() |
+                (bits(currState->vaddr, 19, 12) << 2);
+            DPRINTF(TLB, "L1 descriptor points to page table at: %#x (%s)\n",
+                    l2desc_addr, currState->isSecure ? "s" : "ns");
 
-        DPRINTF(TLB, "Inserting Section Descriptor into TLB\n");
-        DPRINTF(TLB, " - N:%d pfn:%#x size: %#x global:%d valid: %d\n",
-                te.N, te.pfn, te.size, te.global, te.valid);
-        DPRINTF(TLB, " - vpn:%#x sNp: %d xn:%d ap:%d domain: %d asid:%d nc:%d\n",
-                te.vpn, te.sNp, te.xn, te.ap, te.domain, te.asid,
-                te.nonCacheable);
-        DPRINTF(TLB, " - domain from l1 desc: %d data: %#x bits:%d\n",
-                currState->l1Desc.domain(), currState->l1Desc.data,
-                (currState->l1Desc.data >> 5) & 0xF );
+            // Trickbox address check
+            currState->fault = tlb->walkTrickBoxCheck(
+                l2desc_addr, currState->isSecure, currState->vaddr,
+                sizeof(uint32_t), currState->isFetch, currState->isWrite,
+                currState->l1Desc.domain(), L2);
 
+            if (currState->fault) {
+                if (!currState->timing) {
+                    currState->tc = NULL;
+                    currState->req = NULL;
+                }
+                return;
+            }
+
+            Request::Flags flag = 0;
+            if (currState->isSecure)
+                flag.set(Request::SECURE);
+
+            bool delayed;
+            delayed = fetchDescriptor(l2desc_addr,
+                                      (uint8_t*)&currState->l2Desc.data,
+                                      sizeof(uint32_t), flag, -1, &doL2DescEvent,
+                                      &TableWalker::doL2Descriptor);
+            if (delayed) {
+                currState->delayed = true;
+            }
+
+            return;
+        }
+      default:
+        panic("A new type in a 2 bit field?\n");
+    }
+}
+
+void
+TableWalker::doLongDescriptor()
+{
+    if (currState->fault != NoFault) {
+        return;
+    }
+
+    DPRINTF(TLB, "L%d descriptor for %#llx is %#llx (%s)\n",
+            currState->longDesc.lookupLevel, currState->vaddr_tainted,
+            currState->longDesc.data,
+            currState->aarch64 ? "AArch64" : "long-desc.");
+
+    if ((currState->longDesc.type() == LongDescriptor::Block) ||
+        (currState->longDesc.type() == LongDescriptor::Page)) {
+        DPRINTF(TLBVerbose, "Analyzing L%d descriptor: %#llx, pxn: %d, "
+                "xn: %d, ap: %d, af: %d, type: %d\n",
+                currState->longDesc.lookupLevel,
+                currState->longDesc.data,
+                currState->longDesc.pxn(),
+                currState->longDesc.xn(),
+                currState->longDesc.ap(),
+                currState->longDesc.af(),
+                currState->longDesc.type());
+    } else {
+        DPRINTF(TLBVerbose, "Analyzing L%d descriptor: %#llx, type: %d\n",
+                currState->longDesc.lookupLevel,
+                currState->longDesc.data,
+                currState->longDesc.type());
+    }
+
+    TlbEntry te;
+
+    switch (currState->longDesc.type()) {
+      case LongDescriptor::Invalid:
         if (!currState->timing) {
             currState->tc = NULL;
             currState->req = NULL;
         }
-        tlb->insert(currState->vaddr, te);
 
+        DPRINTF(TLB, "L%d descriptor Invalid, causing fault type %d\n",
+                currState->longDesc.lookupLevel,
+                ArmFault::TranslationLL + currState->longDesc.lookupLevel);
+        if (currState->isFetch)
+            currState->fault = new PrefetchAbort(
+                currState->vaddr_tainted,
+                ArmFault::TranslationLL + currState->longDesc.lookupLevel,
+                isStage2,
+                ArmFault::LpaeTran);
+        else
+            currState->fault = new DataAbort(
+                currState->vaddr_tainted,
+                TlbEntry::DomainType::NoAccess,
+                currState->isWrite,
+                ArmFault::TranslationLL + currState->longDesc.lookupLevel,
+                isStage2,
+                ArmFault::LpaeTran);
         return;
-      case L1Descriptor::PageTable:
-        Addr l2desc_addr;
-        l2desc_addr = currState->l1Desc.l2Addr() |
-                      (bits(currState->vaddr, 19,12) << 2);
-        DPRINTF(TLB, "L1 descriptor points to page table at: %#x\n",
-                l2desc_addr);
-
-        // Trickbox address check
-        currState->fault = tlb->walkTrickBoxCheck(l2desc_addr, currState->vaddr,
-                sizeof(uint32_t), currState->isFetch, currState->isWrite,
-                currState->l1Desc.domain(), false);
-
-        if (currState->fault) {
-            if (!currState->timing) {
-                currState->tc = NULL;
-                currState->req = NULL;
+      case LongDescriptor::Block:
+      case LongDescriptor::Page:
+        {
+            bool fault = false;
+            bool aff = false;
+            // Check for address size fault
+            if (checkAddrSizeFaultAArch64(
+                    mbits(currState->longDesc.data, MaxPhysAddrRange - 1,
+                          currState->longDesc.offsetBits()),
+                    currState->physAddrRange)) {
+                fault = true;
+                DPRINTF(TLB, "L%d descriptor causing Address Size Fault\n",
+                        currState->longDesc.lookupLevel);
+            // Check for access fault
+            } else if (currState->longDesc.af() == 0) {
+                fault = true;
+                DPRINTF(TLB, "L%d descriptor causing Access Fault\n",
+                        currState->longDesc.lookupLevel);
+                aff = true;
+            }
+            if (fault) {
+                if (currState->isFetch)
+                    currState->fault = new PrefetchAbort(
+                        currState->vaddr_tainted,
+                        (aff ? ArmFault::AccessFlagLL : ArmFault::AddressSizeLL) +
+                        currState->longDesc.lookupLevel,
+                        isStage2,
+                        ArmFault::LpaeTran);
+                else
+                    currState->fault = new DataAbort(
+                        currState->vaddr_tainted,
+                        TlbEntry::DomainType::NoAccess, currState->isWrite,
+                        (aff ? ArmFault::AccessFlagLL : ArmFault::AddressSizeLL) +
+                        currState->longDesc.lookupLevel,
+                        isStage2,
+                        ArmFault::LpaeTran);
+            } else {
+                insertTableEntry(currState->longDesc, true);
             }
-            return;
         }
+        return;
+      case LongDescriptor::Table:
+        {
+            // Set hierarchical permission flags
+            currState->secureLookup = currState->secureLookup &&
+                currState->longDesc.secureTable();
+            currState->rwTable = currState->rwTable &&
+                currState->longDesc.rwTable();
+            currState->userTable = currState->userTable &&
+                currState->longDesc.userTable();
+            currState->xnTable = currState->xnTable ||
+                currState->longDesc.xnTable();
+            currState->pxnTable = currState->pxnTable ||
+                currState->longDesc.pxnTable();
 
+            // Set up next level lookup
+            Addr next_desc_addr = currState->longDesc.nextDescAddr(
+                currState->vaddr);
 
-        if (currState->timing) {
-            currState->delayed = true;
-            port.dmaAction(MemCmd::ReadReq, l2desc_addr, sizeof(uint32_t),
-                           &doL2DescEvent, (uint8_t*)&currState->l2Desc.data,
-                           currState->tc->getCpuPtr()->clockPeriod());
-        } else if (!currState->functional) {
-            port.dmaAction(MemCmd::ReadReq, l2desc_addr, sizeof(uint32_t),
-                           NULL, (uint8_t*)&currState->l2Desc.data,
-                           currState->tc->getCpuPtr()->clockPeriod());
-            doL2Descriptor();
-        } else {
-            RequestPtr req = new Request(l2desc_addr, sizeof(uint32_t), 0,
-                                         masterId);
-            req->taskId(ContextSwitchTaskId::DMA);
-            PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
-            pkt->dataStatic((uint8_t*)&currState->l2Desc.data);
-            port.sendFunctional(pkt);
-            doL2Descriptor();
-            delete req;
-            delete pkt;
+            DPRINTF(TLB, "L%d descriptor points to L%d descriptor at: %#x (%s)\n",
+                    currState->longDesc.lookupLevel,
+                    currState->longDesc.lookupLevel + 1,
+                    next_desc_addr,
+                    currState->secureLookup ? "s" : "ns");
+
+            // Check for address size fault
+            if (currState->aarch64 && checkAddrSizeFaultAArch64(
+                    next_desc_addr, currState->physAddrRange)) {
+                DPRINTF(TLB, "L%d descriptor causing Address Size Fault\n",
+                        currState->longDesc.lookupLevel);
+                if (currState->isFetch)
+                    currState->fault = new PrefetchAbort(
+                        currState->vaddr_tainted,
+                        ArmFault::AddressSizeLL
+                        + currState->longDesc.lookupLevel,
+                        isStage2,
+                        ArmFault::LpaeTran);
+                else
+                    currState->fault = new DataAbort(
+                        currState->vaddr_tainted,
+                        TlbEntry::DomainType::NoAccess, currState->isWrite,
+                        ArmFault::AddressSizeLL
+                        + currState->longDesc.lookupLevel,
+                        isStage2,
+                        ArmFault::LpaeTran);
+                return;
+            }
+
+            // Trickbox address check
+            currState->fault = tlb->walkTrickBoxCheck(
+                            next_desc_addr, currState->vaddr,
+                            currState->vaddr, sizeof(uint64_t),
+                            currState->isFetch, currState->isWrite,
+                            TlbEntry::DomainType::Client,
+                            toLookupLevel(currState->longDesc.lookupLevel +1));
+
+            if (currState->fault) {
+                if (!currState->timing) {
+                    currState->tc = NULL;
+                    currState->req = NULL;
+                }
+                return;
+            }
+
+            Request::Flags flag = 0;
+            if (currState->secureLookup)
+                flag.set(Request::SECURE);
+
+            currState->longDesc.lookupLevel =
+                (LookupLevel) (currState->longDesc.lookupLevel + 1);
+            Event *event = NULL;
+            switch (currState->longDesc.lookupLevel) {
+              case L1:
+                assert(currState->aarch64);
+                event = &doL1LongDescEvent;
+                break;
+              case L2:
+                event = &doL2LongDescEvent;
+                break;
+              case L3:
+                event = &doL3LongDescEvent;
+                break;
+              default:
+                panic("Wrong lookup level in table walk\n");
+                break;
+            }
+
+            bool delayed;
+            delayed = fetchDescriptor(next_desc_addr, (uint8_t*)&currState->longDesc.data,
+                                      sizeof(uint64_t), flag, -1, event,
+                                      &TableWalker::doLongDescriptor);
+            if (delayed) {
+                 currState->delayed = true;
+            }
         }
         return;
       default:
@@ -671,8 +1548,12 @@ TableWalker::doL1Descriptor()
 void
 TableWalker::doL2Descriptor()
 {
+    if (currState->fault != NoFault) {
+        return;
+    }
+
     DPRINTF(TLB, "L2 descriptor for %#x is %#x\n",
-            currState->vaddr, currState->l2Desc.data);
+            currState->vaddr_tainted, currState->l2Desc.data);
     TlbEntry te;
 
     if (currState->l2Desc.invalid()) {
@@ -683,11 +1564,16 @@ TableWalker::doL2Descriptor()
         }
         if (currState->isFetch)
             currState->fault =
-                new PrefetchAbort(currState->vaddr, ArmFault::Translation1);
+                new PrefetchAbort(currState->vaddr_tainted,
+                                  ArmFault::TranslationLL + L2,
+                                  isStage2,
+                                  ArmFault::VmsaTran);
         else
             currState->fault =
-                new DataAbort(currState->vaddr, currState->l1Desc.domain(),
-                              currState->isWrite, ArmFault::Translation1);
+                new DataAbort(currState->vaddr_tainted, currState->l1Desc.domain(),
+                              currState->isWrite, ArmFault::TranslationLL + L2,
+                              isStage2,
+                              ArmFault::VmsaTran);
         return;
     }
 
@@ -695,53 +1581,38 @@ TableWalker::doL2Descriptor()
         /** @todo: check sctlr.ha (bit[17]) if Hardware Access Flag is enabled
           * if set, do l2.Desc.setAp0() instead of generating AccessFlag0
           */
+         DPRINTF(TLB, "Generating access fault at L2, afe: %d, ap: %d\n",
+                 currState->sctlr.afe, currState->l2Desc.ap());
 
         currState->fault =
-            new DataAbort(currState->vaddr, 0, currState->isWrite,
-                          ArmFault::AccessFlag1);
-
+            new DataAbort(currState->vaddr_tainted,
+                          TlbEntry::DomainType::NoAccess, currState->isWrite,
+                          ArmFault::AccessFlagLL + L2, isStage2,
+                          ArmFault::VmsaTran);
     }
 
-    if (currState->l2Desc.large()) {
-      te.N = 16;
-      te.pfn = currState->l2Desc.pfn();
-    } else {
-      te.N = 12;
-      te.pfn = currState->l2Desc.pfn();
-    }
-
-    te.valid = true;
-    te.size =  (1 << te.N) - 1;
-    te.asid = currState->contextId;
-    te.sNp = false;
-    te.vpn = currState->vaddr >> te.N;
-    te.global = currState->l2Desc.global();
-    te.xn = currState->l2Desc.xn();
-    te.ap = currState->l2Desc.ap();
-    te.domain = currState->l1Desc.domain();
-    memAttrs(currState->tc, te, currState->sctlr, currState->l2Desc.texcb(),
-             currState->l2Desc.shareable());
-
-    if (!currState->timing) {
-        currState->tc = NULL;
-        currState->req = NULL;
-    }
-    tlb->insert(currState->vaddr, te);
+    insertTableEntry(currState->l2Desc, false);
 }
 
 void
 TableWalker::doL1DescriptorWrapper()
 {
-    currState = stateQueueL1.front();
+    currState = stateQueues[L1].front();
     currState->delayed = false;
+    // if there's a stage2 translation object we don't need it any more
+    if (currState->stage2Tran) {
+        delete currState->stage2Tran;
+        currState->stage2Tran = NULL;
+    }
+
 
     DPRINTF(TLBVerbose, "L1 Desc object host addr: %p\n",&currState->l1Desc.data);
     DPRINTF(TLBVerbose, "L1 Desc object      data: %08x\n",currState->l1Desc.data);
 
-    DPRINTF(TLBVerbose, "calling doL1Descriptor for vaddr:%#x\n", currState->vaddr);
+    DPRINTF(TLBVerbose, "calling doL1Descriptor for vaddr:%#x\n", currState->vaddr_tainted);
     doL1Descriptor();
 
-    stateQueueL1.pop_front();
+    stateQueues[L1].pop_front();
     completeDrain();
     // Check if fault was generated
     if (currState->fault != NoFault) {
@@ -758,9 +1629,12 @@ TableWalker::doL1DescriptorWrapper()
     }
     else if (!currState->delayed) {
         // delay is not set so there is no L2 to do
-        DPRINTF(TLBVerbose, "calling translateTiming again\n");
-        currState->fault = tlb->translateTiming(currState->req, currState->tc,
-                                       currState->transState, currState->mode);
+        // Don't finish the translation if a stage 2 look up is underway
+        if (!currState->doingStage2) {
+            DPRINTF(TLBVerbose, "calling translateTiming again\n");
+            currState->fault = tlb->translateTiming(currState->req, currState->tc,
+                currState->transState, currState->mode);
+        }
 
         pending = false;
         nextWalk(currState->tc);
@@ -771,7 +1645,7 @@ TableWalker::doL1DescriptorWrapper()
         delete currState;
     } else {
         // need to do L2 descriptor
-        stateQueueL2.push_back(currState);
+        stateQueues[L2].push_back(currState);
     }
     currState = NULL;
 }
@@ -779,11 +1653,16 @@ TableWalker::doL1DescriptorWrapper()
 void
 TableWalker::doL2DescriptorWrapper()
 {
-    currState = stateQueueL2.front();
+    currState = stateQueues[L2].front();
     assert(currState->delayed);
+    // if there's a stage2 translation object we don't need it any more
+    if (currState->stage2Tran) {
+        delete currState->stage2Tran;
+        currState->stage2Tran = NULL;
+    }
 
     DPRINTF(TLBVerbose, "calling doL2Descriptor for vaddr:%#x\n",
-            currState->vaddr);
+            currState->vaddr_tainted);
     doL2Descriptor();
 
     // Check if fault was generated
@@ -792,13 +1671,16 @@ TableWalker::doL2DescriptorWrapper()
                                       currState->tc, currState->mode);
     }
     else {
-        DPRINTF(TLBVerbose, "calling translateTiming again\n");
-        currState->fault = tlb->translateTiming(currState->req, currState->tc,
-                                      currState->transState, currState->mode);
+        // Don't finish the translation if a stage 2 look up is underway
+        if (!currState->doingStage2) {
+            DPRINTF(TLBVerbose, "calling translateTiming again\n");
+            currState->fault = tlb->translateTiming(currState->req,
+                currState->tc, currState->transState, currState->mode);
+        }
     }
 
 
-    stateQueueL2.pop_front();
+    stateQueues[L2].pop_front();
     completeDrain();
     pending = false;
     nextWalk(currState->tc);
@@ -811,6 +1693,88 @@ TableWalker::doL2DescriptorWrapper()
     currState = NULL;
 }
 
+void
+TableWalker::doL0LongDescriptorWrapper()
+{
+    doLongDescriptorWrapper(L0);
+}
+
+void
+TableWalker::doL1LongDescriptorWrapper()
+{
+    doLongDescriptorWrapper(L1);
+}
+
+void
+TableWalker::doL2LongDescriptorWrapper()
+{
+    doLongDescriptorWrapper(L2);
+}
+
+void
+TableWalker::doL3LongDescriptorWrapper()
+{
+    doLongDescriptorWrapper(L3);
+}
+
+void
+TableWalker::doLongDescriptorWrapper(LookupLevel curr_lookup_level)
+{
+    currState = stateQueues[curr_lookup_level].front();
+    assert(curr_lookup_level == currState->longDesc.lookupLevel);
+    currState->delayed = false;
+
+    // if there's a stage2 translation object we don't need it any more
+    if (currState->stage2Tran) {
+        delete currState->stage2Tran;
+        currState->stage2Tran = NULL;
+    }
+
+    DPRINTF(TLBVerbose, "calling doLongDescriptor for vaddr:%#x\n",
+            currState->vaddr_tainted);
+    doLongDescriptor();
+
+    stateQueues[curr_lookup_level].pop_front();
+
+    if (currState->fault != NoFault) {
+        // A fault was generated
+        currState->transState->finish(currState->fault, currState->req,
+                                      currState->tc, currState->mode);
+
+        pending = false;
+        nextWalk(currState->tc);
+
+        currState->req = NULL;
+        currState->tc = NULL;
+        currState->delayed = false;
+        delete currState;
+    } else if (!currState->delayed) {
+        // No additional lookups required
+        // Don't finish the translation if a stage 2 look up is underway
+        if (!currState->doingStage2) {
+            DPRINTF(TLBVerbose, "calling translateTiming again\n");
+            currState->fault = tlb->translateTiming(currState->req, currState->tc,
+                                                    currState->transState,
+                                                    currState->mode);
+        }
+
+        pending = false;
+        nextWalk(currState->tc);
+
+        currState->req = NULL;
+        currState->tc = NULL;
+        currState->delayed = false;
+        delete currState;
+    } else {
+        if (curr_lookup_level >= MAX_LOOKUP_LEVELS - 1)
+            panic("Max. number of lookups already reached in table walk\n");
+        // Need to perform additional lookups
+        stateQueues[currState->longDesc.lookupLevel].push_back(currState);
+    }
+    currState = NULL;
+}
+
+
 void
 TableWalker::nextWalk(ThreadContext *tc)
 {
@@ -818,7 +1782,146 @@ TableWalker::nextWalk(ThreadContext *tc)
         schedule(doProcessEvent, clockEdge(Cycles(1)));
 }
 
+bool
+TableWalker::fetchDescriptor(Addr descAddr, uint8_t *data, int numBytes,
+    Request::Flags flags, int queueIndex, Event *event,
+    void (TableWalker::*doDescriptor)())
+{
+    bool isTiming = currState->timing;
 
+    // do the requests for the page table descriptors have to go through the
+    // second stage MMU
+    if (currState->stage2Req) {
+        Fault fault;
+        flags = flags | TLB::MustBeOne;
+
+        if (isTiming) {
+            Stage2MMU::Stage2Translation *tran = new
+                Stage2MMU::Stage2Translation(*stage2Mmu, data, event,
+                                             currState->vaddr);
+            currState->stage2Tran = tran;
+            stage2Mmu->readDataTimed(currState->tc, descAddr, tran, numBytes,
+                                     flags, masterId);
+            fault = tran->fault;
+        } else {
+            fault = stage2Mmu->readDataUntimed(currState->tc,
+                currState->vaddr, descAddr, data, numBytes, flags, masterId,
+                currState->functional);
+        }
+
+        if (fault != NoFault) {
+            currState->fault = fault;
+        }
+        if (isTiming) {
+            if (queueIndex >= 0) {
+                DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n",
+                        stateQueues[queueIndex].size());
+                stateQueues[queueIndex].push_back(currState);
+                currState = NULL;
+            }
+        } else {
+            (this->*doDescriptor)();
+        }
+    } else {
+        if (isTiming) {
+            port.dmaAction(MemCmd::ReadReq, descAddr, numBytes, event, data,
+                           currState->tc->getCpuPtr()->clockPeriod(), flags);
+            if (queueIndex >= 0) {
+                DPRINTF(TLBVerbose, "Adding to walker fifo: queue size before adding: %d\n",
+                        stateQueues[queueIndex].size());
+                stateQueues[queueIndex].push_back(currState);
+                currState = NULL;
+            }
+        } else if (!currState->functional) {
+            port.dmaAction(MemCmd::ReadReq, descAddr, numBytes, NULL, data,
+                           currState->tc->getCpuPtr()->clockPeriod(), flags);
+            (this->*doDescriptor)();
+        } else {
+            RequestPtr req = new Request(descAddr, numBytes, flags, masterId);
+            req->taskId(ContextSwitchTaskId::DMA);
+            PacketPtr  pkt = new Packet(req, MemCmd::ReadReq);
+            pkt->dataStatic(data);
+            port.sendFunctional(pkt);
+            (this->*doDescriptor)();
+            delete req;
+            delete pkt;
+        }
+    }
+    return (isTiming);
+}
+
+void
+TableWalker::insertTableEntry(DescriptorBase &descriptor, bool longDescriptor)
+{
+    TlbEntry te;
+
+    // Create and fill a new page table entry
+    te.valid          = true;
+    te.longDescFormat = longDescriptor;
+    te.isHyp          = currState->isHyp;
+    te.asid           = currState->asid;
+    te.vmid           = currState->vmid;
+    te.N              = descriptor.offsetBits();
+    te.vpn            = currState->vaddr >> te.N;
+    te.size           = (1<<te.N) - 1;
+    te.pfn            = descriptor.pfn();
+    te.domain         = descriptor.domain();
+    te.lookupLevel    = descriptor.lookupLevel;
+    te.ns             = !descriptor.secure(haveSecurity, currState) || isStage2;
+    te.nstid          = !currState->isSecure;
+    te.xn             = descriptor.xn();
+    if (currState->aarch64)
+        te.el         = currState->el;
+    else
+        te.el         = 1;
+
+    // ASID has no meaning for stage 2 TLB entries, so mark all stage 2 entries
+    // as global
+    te.global         = descriptor.global(currState) || isStage2;
+    if (longDescriptor) {
+        LongDescriptor lDescriptor =
+            dynamic_cast<LongDescriptor &>(descriptor);
+
+        te.xn |= currState->xnTable;
+        te.pxn = currState->pxnTable || lDescriptor.pxn();
+        if (isStage2) {
+            // this is actually the HAP field, but its stored in the same bit
+            // possitions as the AP field in a stage 1 translation.
+            te.hap = lDescriptor.ap();
+        } else {
+           te.ap = ((!currState->rwTable || descriptor.ap() >> 1) << 1) |
+               (currState->userTable && (descriptor.ap() & 0x1));
+        }
+        if (currState->aarch64)
+            memAttrsAArch64(currState->tc, te, currState->longDesc.attrIndx(),
+                            currState->longDesc.sh());
+        else
+            memAttrsLPAE(currState->tc, te, lDescriptor);
+    } else {
+        te.ap = descriptor.ap();
+        memAttrs(currState->tc, te, currState->sctlr, descriptor.texcb(),
+                 descriptor.shareable());
+    }
+
+    // Debug output
+    DPRINTF(TLB, descriptor.dbgHeader().c_str());
+    DPRINTF(TLB, " - N:%d pfn:%#x size:%#x global:%d valid:%d\n",
+            te.N, te.pfn, te.size, te.global, te.valid);
+    DPRINTF(TLB, " - vpn:%#x xn:%d pxn:%d ap:%d domain:%d asid:%d "
+            "vmid:%d hyp:%d nc:%d ns:%d\n", te.vpn, te.xn, te.pxn,
+            te.ap, static_cast<uint8_t>(te.domain), te.asid, te.vmid, te.isHyp,
+            te.nonCacheable, te.ns);
+    DPRINTF(TLB, " - domain from L%d desc:%d data:%#x\n",
+            descriptor.lookupLevel, static_cast<uint8_t>(descriptor.domain()),
+            descriptor.getRawData());
+
+    // Insert the entry into the TLB
+    tlb->insert(currState->vaddr, te);
+    if (!currState->timing) {
+        currState->tc  = NULL;
+        currState->req = NULL;
+    }
+}
 
 ArmISA::TableWalker *
 ArmTableWalkerParams::create()
@@ -826,3 +1929,17 @@ ArmTableWalkerParams::create()
     return new ArmISA::TableWalker(this);
 }
 
+LookupLevel
+TableWalker::toLookupLevel(uint8_t lookup_level_as_int)
+{
+    switch (lookup_level_as_int) {
+      case L1:
+        return L1;
+      case L2:
+        return L2;
+      case L3:
+        return L3;
+      default:
+        panic("Invalid lookup level conversion");
+    }
+}
diff --git a/src/arch/arm/table_walker.hh b/src/arch/arm/table_walker.hh
index 23464f56d..4753fe6a0 100644
--- a/src/arch/arm/table_walker.hh
+++ b/src/arch/arm/table_walker.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -35,6 +35,7 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Ali Saidi
+ *          Giacomo Gabrielli
  */
 
 #ifndef __ARCH_ARM_TABLE_WALKER_HH__
@@ -43,6 +44,7 @@
 #include <list>
 
 #include "arch/arm/miscregs.hh"
+#include "arch/arm/system.hh"
 #include "arch/arm/tlb.hh"
 #include "dev/dma_device.hh"
 #include "mem/mem_object.hh"
@@ -56,11 +58,39 @@ class ThreadContext;
 namespace ArmISA {
 class Translation;
 class TLB;
+class Stage2MMU;
 
 class TableWalker : public MemObject
 {
   public:
-    struct L1Descriptor {
+    class WalkerState;
+
+    class DescriptorBase {
+      public:
+        /** Current lookup level for this descriptor */
+        LookupLevel lookupLevel;
+
+        virtual Addr pfn() const = 0;
+        virtual TlbEntry::DomainType domain() const = 0;
+        virtual bool xn() const = 0;
+        virtual uint8_t ap() const = 0;
+        virtual bool global(WalkerState *currState) const = 0;
+        virtual uint8_t offsetBits() const = 0;
+        virtual bool secure(bool have_security, WalkerState *currState) const = 0;
+        virtual std::string dbgHeader() const = 0;
+        virtual uint64_t getRawData() const = 0;
+        virtual uint8_t texcb() const
+        {
+            panic("texcb() not implemented for this class\n");
+        }
+        virtual bool shareable() const
+        {
+            panic("shareable() not implemented for this class\n");
+        }
+    };
+
+    class L1Descriptor : public DescriptorBase {
+      public:
         /** Type of page table entry ARM DDI 0406B: B3-8*/
         enum EntryType {
             Ignore,
@@ -76,6 +106,27 @@ class TableWalker : public MemObject
          * written back to memory */
         bool _dirty;
 
+        /** Default ctor */
+        L1Descriptor()
+        {
+            lookupLevel = L1;
+        }
+
+        virtual uint64_t getRawData() const
+        {
+            return (data);
+        }
+
+        virtual std::string dbgHeader() const
+        {
+            return "Inserting Section Descriptor into TLB\n";
+        }
+
+        virtual uint8_t offsetBits() const
+        {
+            return 20;
+        }
+
         EntryType type() const
         {
             return (EntryType)(data & 0x3);
@@ -112,9 +163,9 @@ class TableWalker : public MemObject
         }
 
         /** Is the translation global (no asid used)? */
-        bool global() const
+        bool global(WalkerState *currState) const
         {
-            return bits(data, 17);
+            return !bits(data, 17);
         }
 
         /** Is the translation not allow execution? */
@@ -130,9 +181,9 @@ class TableWalker : public MemObject
         }
 
         /** Domain Client/Manager: ARM DDI 0406B: B3-31 */
-        uint8_t domain() const
+        TlbEntry::DomainType domain() const
         {
-            return bits(data, 8, 5);
+            return static_cast<TlbEntry::DomainType>(bits(data, 8, 5));
         }
 
         /** Address of L2 descriptor if it exists */
@@ -171,18 +222,70 @@ class TableWalker : public MemObject
         {
             return _dirty;
         }
+
+        /**
+         * Returns true if this entry targets the secure physical address
+         * map.
+         */
+        bool secure(bool have_security, WalkerState *currState) const
+        {
+            if (have_security) {
+                if (type() == PageTable)
+                    return !bits(data, 3);
+                else
+                    return !bits(data, 19);
+            }
+            return false;
+        }
     };
 
     /** Level 2 page table descriptor */
-    struct L2Descriptor {
-
+    class L2Descriptor : public DescriptorBase {
+      public:
         /** The raw bits of the entry. */
-        uint32_t data;
+        uint32_t     data;
+        L1Descriptor *l1Parent;
 
         /** This entry has been modified (access flag set) and needs to be
          * written back to memory */
         bool _dirty;
 
+        /** Default ctor */
+        L2Descriptor()
+        {
+            lookupLevel = L2;
+        }
+
+        L2Descriptor(L1Descriptor &parent) : l1Parent(&parent)
+        {
+            lookupLevel = L2;
+        }
+
+        virtual uint64_t getRawData() const
+        {
+            return (data);
+        }
+
+        virtual std::string dbgHeader() const
+        {
+            return "Inserting L2 Descriptor into TLB\n";
+        }
+
+        virtual TlbEntry::DomainType domain() const
+        {
+            return l1Parent->domain();
+        }
+
+        bool secure(bool have_security, WalkerState *currState) const
+        {
+            return l1Parent->secure(have_security, currState);
+        }
+
+        virtual uint8_t offsetBits() const
+        {
+            return large() ? 16 : 12;
+        }
+
         /** Is the entry invalid */
         bool invalid() const
         {
@@ -202,7 +305,7 @@ class TableWalker : public MemObject
         }
 
         /** Is the translation global (no asid used)? */
-        bool global() const
+        bool global(WalkerState *currState) const
         {
             return !bits(data, 11);
         }
@@ -259,6 +362,426 @@ class TableWalker : public MemObject
 
     };
 
+    /** Long-descriptor format (LPAE) */
+    class LongDescriptor : public DescriptorBase {
+      public:
+        /** Descriptor type */
+        enum EntryType {
+            Invalid,
+            Table,
+            Block,
+            Page
+        };
+
+        /** The raw bits of the entry */
+        uint64_t data;
+
+        /** This entry has been modified (access flag set) and needs to be
+         * written back to memory */
+        bool _dirty;
+
+        virtual uint64_t getRawData() const
+        {
+            return (data);
+        }
+
+        virtual std::string dbgHeader() const
+        {
+            if (type() == LongDescriptor::Page) {
+                assert(lookupLevel == L3);
+                return "Inserting Page descriptor into TLB\n";
+            } else {
+                assert(lookupLevel < L3);
+                return "Inserting Block descriptor into TLB\n";
+            }
+        }
+
+        /**
+         * Returns true if this entry targets the secure physical address
+         * map.
+         */
+        bool secure(bool have_security, WalkerState *currState) const
+        {
+            assert(type() == Block || type() == Page);
+            return have_security && (currState->secureLookup && !bits(data, 5));
+        }
+
+        /** True if the current lookup is performed in AArch64 state */
+        bool aarch64;
+
+        /** True if the granule size is 64 KB (AArch64 only) */
+        bool largeGrain;
+
+        /** Width of the granule size in bits */
+        int grainSize;
+
+        /** Return the descriptor type */
+        EntryType type() const
+        {
+            switch (bits(data, 1, 0)) {
+              case 0x1:
+                // In AArch64 blocks are not allowed at L0 for the 4 KB granule
+                // and at L1 for the 64 KB granule
+                if (largeGrain)
+                    return lookupLevel == L2 ? Block : Invalid;
+                return lookupLevel == L0 || lookupLevel == L3 ? Invalid : Block;
+              case 0x3:
+                return lookupLevel == L3 ? Page : Table;
+              default:
+                return Invalid;
+            }
+        }
+
+        /** Return the bit width of the page/block offset */
+        uint8_t offsetBits() const
+        {
+            assert(type() == Block || type() == Page);
+            if (largeGrain) {
+                if (type() == Block)
+                    return 29 /* 512 MB */;
+                return 16 /* 64 KB */;  // type() == Page
+            } else {
+                if (type() == Block)
+                    return lookupLevel == L1 ? 30 /* 1 GB */ : 21 /* 2 MB */;
+                return 12 /* 4 KB */;  // type() == Page
+            }
+        }
+
+        /** Return the physical frame, bits shifted right */
+        Addr pfn() const
+        {
+            if (aarch64)
+                return bits(data, 47, offsetBits());
+            return bits(data, 39, offsetBits());
+        }
+
+        /** Return the complete physical address given a VA */
+        Addr paddr(Addr va) const
+        {
+            int n = offsetBits();
+            if (aarch64)
+                return mbits(data, 47, n) | mbits(va, n - 1, 0);
+            return mbits(data, 39, n) | mbits(va, n - 1, 0);
+        }
+
+        /** Return the physical address of the entry */
+        Addr paddr() const
+        {
+            if (aarch64)
+                return mbits(data, 47, offsetBits());
+            return mbits(data, 39, offsetBits());
+        }
+
+        /** Return the address of the next page table */
+        Addr nextTableAddr() const
+        {
+            assert(type() == Table);
+            if (aarch64)
+                return mbits(data, 47, grainSize);
+            else
+                return mbits(data, 39, 12);
+        }
+
+        /** Return the address of the next descriptor */
+        Addr nextDescAddr(Addr va) const
+        {
+            assert(type() == Table);
+            Addr pa = 0;
+            if (aarch64) {
+                int stride = grainSize - 3;
+                int va_lo = stride * (3 - (lookupLevel + 1)) + grainSize;
+                int va_hi = va_lo + stride - 1;
+                pa = nextTableAddr() | (bits(va, va_hi, va_lo) << 3);
+            } else {
+                if (lookupLevel == L1)
+                    pa = nextTableAddr() | (bits(va, 29, 21) << 3);
+                else  // lookupLevel == L2
+                    pa = nextTableAddr() | (bits(va, 20, 12) << 3);
+            }
+            return pa;
+        }
+
+        /** Is execution allowed on this mapping? */
+        bool xn() const
+        {
+            assert(type() == Block || type() == Page);
+            return bits(data, 54);
+        }
+
+        /** Is privileged execution allowed on this mapping? (LPAE only) */
+        bool pxn() const
+        {
+            assert(type() == Block || type() == Page);
+            return bits(data, 53);
+        }
+
+        /** Contiguous hint bit. */
+        bool contiguousHint() const
+        {
+            assert(type() == Block || type() == Page);
+            return bits(data, 52);
+        }
+
+        /** Is the translation global (no asid used)? */
+        bool global(WalkerState *currState) const
+        {
+            assert(currState && (type() == Block || type() == Page));
+            if (!currState->aarch64 && (currState->isSecure &&
+                                        !currState->secureLookup)) {
+                return false;  // ARM ARM issue C B3.6.3
+            } else if (currState->aarch64) {
+                if (currState->el == EL2 || currState->el == EL3) {
+                    return true;  // By default translations are treated as global
+                                  // in AArch64 EL2 and EL3
+                } else if (currState->isSecure && !currState->secureLookup) {
+                    return false;
+                }
+            }
+            return !bits(data, 11);
+        }
+
+        /** Returns true if the access flag (AF) is set. */
+        bool af() const
+        {
+            assert(type() == Block || type() == Page);
+            return bits(data, 10);
+        }
+
+        /** 2-bit shareability field */
+        uint8_t sh() const
+        {
+            assert(type() == Block || type() == Page);
+            return bits(data, 9, 8);
+        }
+
+        /** 2-bit access protection flags */
+        uint8_t ap() const
+        {
+            assert(type() == Block || type() == Page);
+            // Long descriptors only support the AP[2:1] scheme
+            return bits(data, 7, 6);
+        }
+
+        /** Read/write access protection flag */
+        bool rw() const
+        {
+            assert(type() == Block || type() == Page);
+            return !bits(data, 7);
+        }
+
+        /** User/privileged level access protection flag */
+        bool user() const
+        {
+            assert(type() == Block || type() == Page);
+            return bits(data, 6);
+        }
+
+        /** Return the AP bits as compatible with the AP[2:0] format.  Utility
+         * function used to simplify the code in the TLB for performing
+         * permission checks. */
+        static uint8_t ap(bool rw, bool user)
+        {
+            return ((!rw) << 2) | (user << 1);
+        }
+
+        TlbEntry::DomainType domain() const
+        {
+            // Long-desc. format only supports Client domain
+            assert(type() == Block || type() == Page);
+            return TlbEntry::DomainType::Client;
+        }
+
+        /** Attribute index */
+        uint8_t attrIndx() const
+        {
+            assert(type() == Block || type() == Page);
+            return bits(data, 4, 2);
+        }
+
+        /** Memory attributes, only used by stage 2 translations */
+        uint8_t memAttr() const
+        {
+            assert(type() == Block || type() == Page);
+            return bits(data, 5, 2);
+        }
+
+        /** Set access flag that this entry has been touched.  Mark the entry as
+         * requiring a writeback, in the future. */
+        void setAf()
+        {
+            data |= 1 << 10;
+            _dirty = true;
+        }
+
+        /** This entry needs to be written back to memory */
+        bool dirty() const
+        {
+            return _dirty;
+        }
+
+        /** Whether the subsequent levels of lookup are secure */
+        bool secureTable() const
+        {
+            assert(type() == Table);
+            return !bits(data, 63);
+        }
+
+        /** Two bit access protection flags for subsequent levels of lookup */
+        uint8_t apTable() const
+        {
+            assert(type() == Table);
+            return bits(data, 62, 61);
+        }
+
+        /** R/W protection flag for subsequent levels of lookup */
+        uint8_t rwTable() const
+        {
+            assert(type() == Table);
+            return !bits(data, 62);
+        }
+
+        /** User/privileged mode protection flag for subsequent levels of
+         * lookup */
+        uint8_t userTable() const
+        {
+            assert(type() == Table);
+            return !bits(data, 61);
+        }
+
+        /** Is execution allowed on subsequent lookup levels? */
+        bool xnTable() const
+        {
+            assert(type() == Table);
+            return bits(data, 60);
+        }
+
+        /** Is privileged execution allowed on subsequent lookup levels? */
+        bool pxnTable() const
+        {
+            assert(type() == Table);
+            return bits(data, 59);
+        }
+    };
+
+    class WalkerState
+    {
+      public:
+        /** Thread context that we're doing the walk for */
+        ThreadContext *tc;
+
+        /** If the access is performed in AArch64 state */
+        bool aarch64;
+
+        /** Current exception level */
+        ExceptionLevel el;
+
+        /** Current physical address range in bits */
+        int physAddrRange;
+
+        /** Request that is currently being serviced */
+        RequestPtr req;
+
+        /** ASID that we're servicing the request under */
+        uint16_t asid;
+        uint8_t vmid;
+        bool    isHyp;
+
+        /** Translation state for delayed requests */
+        TLB::Translation *transState;
+
+        /** The fault that we are going to return */
+        Fault fault;
+
+        /** The virtual address that is being translated with tagging removed.*/
+        Addr vaddr;
+
+        /** The virtual address that is being translated */
+        Addr vaddr_tainted;
+
+        /** Cached copy of the sctlr as it existed when translation began */
+        SCTLR sctlr;
+
+        /** Cached copy of the scr as it existed when translation began */
+        SCR scr;
+
+        /** Cached copy of the cpsr as it existed when translation began */
+        CPSR cpsr;
+
+        /** Cached copy of the ttbcr as it existed when translation began. */
+        TTBCR ttbcr;
+
+        /** Cached copy of the htcr as it existed when translation began. */
+        HTCR htcr;
+
+        /** Cached copy of the htcr as it existed when translation began. */
+        HCR  hcr;
+
+        /** Cached copy of the vtcr as it existed when translation began. */
+        VTCR_t vtcr;
+
+        /** If the access is a write */
+        bool isWrite;
+
+        /** If the access is a fetch (for execution, and no-exec) must be checked?*/
+        bool isFetch;
+
+        /** If the access comes from the secure state. */
+        bool isSecure;
+
+        /** Helper variables used to implement hierarchical access permissions
+         * when the long-desc. format is used (LPAE only) */
+        bool secureLookup;
+        bool rwTable;
+        bool userTable;
+        bool xnTable;
+        bool pxnTable;
+
+        /** Flag indicating if a second stage of lookup is required */
+        bool stage2Req;
+
+        /** Indicates whether the translation has been passed onto the second
+         *  stage mmu, and no more work is required from the first stage.
+         */
+        bool doingStage2;
+
+        /** A pointer to the stage 2 translation that's in progress */
+        TLB::Translation *stage2Tran;
+
+        /** If the mode is timing or atomic */
+        bool timing;
+
+        /** If the atomic mode should be functional */
+        bool functional;
+
+        /** Save mode for use in delayed response */
+        BaseTLB::Mode mode;
+
+        /** The translation type that has been requested */
+        TLB::ArmTranslationType tranType;
+
+        /** Short-format descriptors */
+        L1Descriptor l1Desc;
+        L2Descriptor l2Desc;
+
+        /** Long-format descriptor (LPAE and AArch64) */
+        LongDescriptor longDesc;
+
+        /** Whether the response is delayed in timing mode due to additional
+         * lookups */
+        bool delayed;
+
+        TableWalker *tableWalker;
+
+        void doL1Descriptor();
+        void doL2Descriptor();
+
+        void doLongDescriptor();
+
+        WalkerState();
+
+        std::string name() const { return tableWalker->name(); }
+    };
+
   protected:
 
     /**
@@ -292,68 +815,8 @@ class TableWalker : public MemObject
         { }
     };
 
-    struct WalkerState //: public SimObject
-    {
-        /** Thread context that we're doing the walk for */
-        ThreadContext *tc;
-
-        /** Request that is currently being serviced */
-        RequestPtr req;
-
-        /** Context ID that we're servicing the request under */
-        uint8_t contextId;
-
-        /** Translation state for delayed requests */
-        TLB::Translation *transState;
-
-        /** The fault that we are going to return */
-        Fault fault;
-
-        /** The virtual address that is being translated */
-        Addr vaddr;
-
-        /** Cached copy of the sctlr as it existed when translation began */
-        SCTLR sctlr;
-
-        /** Width of the base address held in TTRB0 */
-        uint32_t N;
-
-        /** If the access is a write */
-        bool isWrite;
-
-        /** If the access is a fetch (for execution, and no-exec) must be checked?*/
-        bool isFetch;
-
-        /** If the mode is timing or atomic */
-        bool timing;
-
-        /** If the atomic mode should be functional */
-        bool functional;
-
-        /** Save mode for use in delayed response */
-        BaseTLB::Mode mode;
-
-        L1Descriptor l1Desc;
-        L2Descriptor l2Desc;
-
-        /** Whether L1/L2 descriptor response is delayed in timing mode */
-        bool delayed;
-
-        TableWalker *tableWalker;
-
-        void doL1Descriptor();
-        void doL2Descriptor();
-
-        std::string name() const {return tableWalker->name();}
-    };
-
-
-    /** Queue of requests that need processing first level translation */
-    std::list<WalkerState *> stateQueueL1;
-
-    /** Queue of requests that have passed first level translation and
-     * require an additional level. */
-    std::list<WalkerState *> stateQueueL2;
+    /** Queues of requests for all the different lookup levels */
+    std::list<WalkerState *> stateQueues[MAX_LOOKUP_LEVELS];
 
     /** Queue of requests that have passed are waiting because the walker is
      * currently busy. */
@@ -366,6 +829,12 @@ class TableWalker : public MemObject
     /** If we're draining keep the drain event around until we're drained */
     DrainManager *drainManager;
 
+    /** The MMU to forward second stage look upts to */
+    Stage2MMU *stage2Mmu;
+
+    /** Indicates whether this table walker is part of the stage 2 mmu */
+    const bool isStage2;
+
     /** TLB that is initiating these table walks */
     TLB *tlb;
 
@@ -384,8 +853,16 @@ class TableWalker : public MemObject
      * removed from the pendingQueue per cycle. */
     unsigned numSquashable;
 
+    /** Cached copies of system-level properties */
+    bool haveSecurity;
+    bool _haveLPAE;
+    bool _haveVirtualization;
+    uint8_t physAddrRange;
+    bool _haveLargeAsid64;
+    ArmSystem *armSys;
+
   public:
-    typedef ArmTableWalkerParams Params;
+   typedef ArmTableWalkerParams Params;
     TableWalker(const Params *p);
     virtual ~TableWalker();
 
@@ -395,38 +872,90 @@ class TableWalker : public MemObject
         return dynamic_cast<const Params *>(_params);
     }
 
+    bool haveLPAE() const { return _haveLPAE; }
+    bool haveVirtualization() const { return _haveVirtualization; }
+    bool haveLargeAsid64() const { return _haveLargeAsid64; }
     /** Checks if all state is cleared and if so, completes drain */
     void completeDrain();
     unsigned int drain(DrainManager *dm);
-    void drainResume();
+    virtual void drainResume();
     virtual BaseMasterPort& getMasterPort(const std::string &if_name,
                                           PortID idx = InvalidPortID);
 
-    Fault walk(RequestPtr req, ThreadContext *tc, uint8_t cid, TLB::Mode mode,
-            TLB::Translation *_trans, bool timing, bool functional = false);
+    /**
+     * Allow the MMU (overseeing both stage 1 and stage 2 TLBs) to
+     * access the table walker port through the TLB so that it can
+     * orchestrate staged translations.
+     *
+     * @return Our DMA port
+     */
+    DmaPort& getWalkerPort() { return port; }
+
+    Fault walk(RequestPtr req, ThreadContext *tc, uint16_t asid, uint8_t _vmid,
+               bool _isHyp, TLB::Mode mode, TLB::Translation *_trans,
+               bool timing, bool functional, bool secure,
+               TLB::ArmTranslationType tranType);
 
     void setTlb(TLB *_tlb) { tlb = _tlb; }
+    TLB* getTlb() { return tlb; }
+    void setMMU(Stage2MMU *m) { stage2Mmu = m; }
     void memAttrs(ThreadContext *tc, TlbEntry &te, SCTLR sctlr,
                   uint8_t texcb, bool s);
+    void memAttrsLPAE(ThreadContext *tc, TlbEntry &te,
+                      LongDescriptor &lDescriptor);
+    void memAttrsAArch64(ThreadContext *tc, TlbEntry &te, uint8_t attrIndx,
+                         uint8_t sh);
+
+    static LookupLevel toLookupLevel(uint8_t lookup_level_as_int);
 
   private:
 
     void doL1Descriptor();
     void doL1DescriptorWrapper();
-    EventWrapper<TableWalker, &TableWalker::doL1DescriptorWrapper> doL1DescEvent;
+    EventWrapper<TableWalker,
+                 &TableWalker::doL1DescriptorWrapper> doL1DescEvent;
 
     void doL2Descriptor();
     void doL2DescriptorWrapper();
-    EventWrapper<TableWalker, &TableWalker::doL2DescriptorWrapper> doL2DescEvent;
+    EventWrapper<TableWalker,
+                 &TableWalker::doL2DescriptorWrapper> doL2DescEvent;
+
+    void doLongDescriptor();
+
+    void doL0LongDescriptorWrapper();
+    EventWrapper<TableWalker,
+                 &TableWalker::doL0LongDescriptorWrapper> doL0LongDescEvent;
+    void doL1LongDescriptorWrapper();
+    EventWrapper<TableWalker,
+                 &TableWalker::doL1LongDescriptorWrapper> doL1LongDescEvent;
+    void doL2LongDescriptorWrapper();
+    EventWrapper<TableWalker,
+                 &TableWalker::doL2LongDescriptorWrapper> doL2LongDescEvent;
+    void doL3LongDescriptorWrapper();
+    EventWrapper<TableWalker,
+                 &TableWalker::doL3LongDescriptorWrapper> doL3LongDescEvent;
+
+    void doLongDescriptorWrapper(LookupLevel curr_lookup_level);
+
+    bool fetchDescriptor(Addr descAddr, uint8_t *data, int numBytes,
+        Request::Flags flags, int queueIndex, Event *event,
+        void (TableWalker::*doDescriptor)());
+
+    void insertTableEntry(DescriptorBase &descriptor, bool longDescriptor);
 
     Fault processWalk();
+    Fault processWalkLPAE();
+    static unsigned adjustTableSizeAArch64(unsigned tsz);
+    /// Returns true if the address exceeds the range permitted by the
+    /// system-wide setting or by the TCR_ELx IPS/PS setting
+    static bool checkAddrSizeFaultAArch64(Addr addr, int currPhysAddrRange);
+    Fault processWalkAArch64();
     void processWalkWrapper();
     EventWrapper<TableWalker, &TableWalker::processWalkWrapper> doProcessEvent;
 
     void nextWalk(ThreadContext *tc);
 };
 
-
 } // namespace ArmISA
 
 #endif //__ARCH_ARM_TABLE_WALKER_HH__
diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc
index 805898576..037f7490e 100644
--- a/src/arch/arm/tlb.cc
+++ b/src/arch/arm/tlb.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -49,6 +49,8 @@
 #include "arch/arm/pagetable.hh"
 #include "arch/arm/system.hh"
 #include "arch/arm/table_walker.hh"
+#include "arch/arm/stage2_lookup.hh"
+#include "arch/arm/stage2_mmu.hh"
 #include "arch/arm/tlb.hh"
 #include "arch/arm/utility.hh"
 #include "base/inifile.hh"
@@ -67,28 +69,51 @@
 using namespace std;
 using namespace ArmISA;
 
-TLB::TLB(const Params *p)
-    : BaseTLB(p), size(p->size) , tableWalker(p->walker),
-    rangeMRU(1), bootUncacheability(false), miscRegValid(false)
+TLB::TLB(const ArmTLBParams *p)
+    : BaseTLB(p), table(new TlbEntry[p->size]), size(p->size),
+    isStage2(p->is_stage2), tableWalker(p->walker), stage2Tlb(NULL),
+    stage2Mmu(NULL), rangeMRU(1), bootUncacheability(false),
+    miscRegValid(false), curTranType(NormalTran)
 {
-    table = new TlbEntry[size];
-    memset(table, 0, sizeof(TlbEntry) * size);
-
     tableWalker->setTlb(this);
+
+    // Cache system-level properties
+    haveLPAE = tableWalker->haveLPAE();
+    haveVirtualization = tableWalker->haveVirtualization();
+    haveLargeAsid64 = tableWalker->haveLargeAsid64();
 }
 
 TLB::~TLB()
 {
-    if (table)
-        delete [] table;
+    delete[] table;
+}
+
+void
+TLB::init()
+{
+    if (stage2Mmu && !isStage2)
+        stage2Tlb = stage2Mmu->stage2Tlb();
+}
+
+void
+TLB::setMMU(Stage2MMU *m)
+{
+    stage2Mmu = m;
+    tableWalker->setMMU(m);
 }
 
 bool
 TLB::translateFunctional(ThreadContext *tc, Addr va, Addr &pa)
 {
-    if (!miscRegValid)
-        updateMiscReg(tc);
-    TlbEntry *e = lookup(va, contextId, true);
+    updateMiscReg(tc);
+
+    if (directToStage2) {
+        assert(stage2Tlb);
+        return stage2Tlb->translateFunctional(tc, va, pa);
+    }
+
+    TlbEntry *e = lookup(va, asid, vmid, isHyp, isSecure, true, false,
+                         aarch64 ? aarch64EL : EL1);
     if (!e)
         return false;
     pa = e->pAddr(va);
@@ -102,22 +127,24 @@ TLB::finalizePhysical(RequestPtr req, ThreadContext *tc, Mode mode) const
 }
 
 TlbEntry*
-TLB::lookup(Addr va, uint8_t cid, bool functional)
+TLB::lookup(Addr va, uint16_t asn, uint8_t vmid, bool hyp, bool secure,
+            bool functional, bool ignore_asn, uint8_t target_el)
 {
 
     TlbEntry *retval = NULL;
 
-    // Maitaining LRU array
-
+    // Maintaining LRU array
     int x = 0;
     while (retval == NULL && x < size) {
-        if (table[x].match(va, cid)) {
-
-            // We only move the hit entry ahead when the position is higher than rangeMRU
+        if ((!ignore_asn && table[x].match(va, asn, vmid, hyp, secure, false,
+             target_el)) ||
+            (ignore_asn && table[x].match(va, vmid, hyp, secure, target_el))) {
+            // We only move the hit entry ahead when the position is higher
+            // than rangeMRU
             if (x > rangeMRU && !functional) {
                 TlbEntry tmp_entry = table[x];
                 for(int i = x; i > 0; i--)
-                    table[i] = table[i-1];
+                    table[i] = table[i - 1];
                 table[0] = tmp_entry;
                 retval = &table[0];
             } else {
@@ -125,14 +152,19 @@ TLB::lookup(Addr va, uint8_t cid, bool functional)
             }
             break;
         }
-        x++;
+        ++x;
     }
 
-    DPRINTF(TLBVerbose, "Lookup %#x, cid %#x -> %s ppn %#x size: %#x pa: %#x ap:%d\n",
-            va, cid, retval ? "hit" : "miss", retval ? retval->pfn : 0,
-            retval ? retval->size : 0, retval ? retval->pAddr(va) : 0,
-            retval ? retval->ap : 0);
-    ;
+    DPRINTF(TLBVerbose, "Lookup %#x, asn %#x -> %s vmn 0x%x hyp %d secure %d "
+            "ppn %#x size: %#x pa: %#x ap:%d ns:%d nstid:%d g:%d asid: %d "
+            "el: %d\n",
+            va, asn, retval ? "hit" : "miss", vmid, hyp, secure,
+            retval ? retval->pfn       : 0, retval ? retval->size  : 0,
+            retval ? retval->pAddr(va) : 0, retval ? retval->ap    : 0,
+            retval ? retval->ns        : 0, retval ? retval->nstid : 0,
+            retval ? retval->global    : 0, retval ? retval->asid  : 0,
+            retval ? retval->el        : 0, retval ? retval->el    : 0);
+
     return retval;
 }
 
@@ -141,122 +173,176 @@ void
 TLB::insert(Addr addr, TlbEntry &entry)
 {
     DPRINTF(TLB, "Inserting entry into TLB with pfn:%#x size:%#x vpn: %#x"
-            " asid:%d N:%d global:%d valid:%d nc:%d sNp:%d xn:%d ap:%#x"
-            " domain:%#x\n", entry.pfn, entry.size, entry.vpn, entry.asid,
-            entry.N, entry.global, entry.valid, entry.nonCacheable, entry.sNp,
-            entry.xn, entry.ap, entry.domain);
+            " asid:%d vmid:%d N:%d global:%d valid:%d nc:%d xn:%d"
+            " ap:%#x domain:%#x ns:%d nstid:%d isHyp:%d\n", entry.pfn,
+            entry.size, entry.vpn, entry.asid, entry.vmid, entry.N,
+            entry.global, entry.valid, entry.nonCacheable, entry.xn,
+            entry.ap, static_cast<uint8_t>(entry.domain), entry.ns, entry.nstid,
+            entry.isHyp);
 
-    if (table[size-1].valid)
-        DPRINTF(TLB, " - Replacing Valid entry %#x, asn %d ppn %#x size: %#x ap:%d\n",
+    if (table[size - 1].valid)
+        DPRINTF(TLB, " - Replacing Valid entry %#x, asn %d vmn %d ppn %#x "
+                "size: %#x ap:%d ns:%d nstid:%d g:%d isHyp:%d el: %d\n",
                 table[size-1].vpn << table[size-1].N, table[size-1].asid,
-                table[size-1].pfn << table[size-1].N, table[size-1].size,
-                table[size-1].ap);
+                table[size-1].vmid, table[size-1].pfn << table[size-1].N,
+                table[size-1].size, table[size-1].ap, table[size-1].ns,
+                table[size-1].nstid, table[size-1].global, table[size-1].isHyp,
+                table[size-1].el);
 
     //inserting to MRU position and evicting the LRU one
 
-    for(int i = size-1; i > 0; i--)
-      table[i] = table[i-1];
+    for (int i = size - 1; i > 0; --i)
+        table[i] = table[i-1];
     table[0] = entry;
 
     inserts++;
 }
 
 void
-TLB::printTlb()
+TLB::printTlb() const
 {
     int x = 0;
     TlbEntry *te;
     DPRINTF(TLB, "Current TLB contents:\n");
     while (x < size) {
-       te = &table[x];
-       if (te->valid)
-           DPRINTF(TLB, " *  %#x, asn %d ppn %#x size: %#x ap:%d\n",
-                te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap);
-       x++;
+        te = &table[x];
+        if (te->valid)
+            DPRINTF(TLB, " *  %s\n", te->print());
+        ++x;
     }
 }
 
-
 void
-TLB::flushAll()
+TLB::flushAllSecurity(bool secure_lookup, uint8_t target_el, bool ignore_el)
 {
-    DPRINTF(TLB, "Flushing all TLB entries\n");
+    DPRINTF(TLB, "Flushing all TLB entries (%s lookup)\n",
+            (secure_lookup ? "secure" : "non-secure"));
     int x = 0;
     TlbEntry *te;
     while (x < size) {
-       te = &table[x];
-       if (te->valid) {
-           DPRINTF(TLB, " -  %#x, asn %d ppn %#x size: %#x ap:%d\n",
-                te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap);
-           flushedEntries++;
-       }
-       x++;
-    }
+        te = &table[x];
+        if (te->valid && secure_lookup == !te->nstid &&
+            (te->vmid == vmid || secure_lookup) &&
+            checkELMatch(target_el, te->el, ignore_el)) {
 
-    memset(table, 0, sizeof(TlbEntry) * size);
+            DPRINTF(TLB, " -  %s\n", te->print());
+            te->valid = false;
+            flushedEntries++;
+        }
+        ++x;
+    }
 
     flushTlb++;
+
+    // If there's a second stage TLB (and we're not it) then flush it as well
+    // if we're currently in hyp mode
+    if (!isStage2 && isHyp) {
+        stage2Tlb->flushAllSecurity(secure_lookup, true);
+    }
 }
 
+void
+TLB::flushAllNs(bool hyp, uint8_t target_el, bool ignore_el)
+{
+    DPRINTF(TLB, "Flushing all NS TLB entries (%s lookup)\n",
+            (hyp ? "hyp" : "non-hyp"));
+    int x = 0;
+    TlbEntry *te;
+    while (x < size) {
+        te = &table[x];
+        if (te->valid && te->nstid && te->isHyp == hyp &&
+            checkELMatch(target_el, te->el, ignore_el)) {
+
+            DPRINTF(TLB, " -  %s\n", te->print());
+            flushedEntries++;
+            te->valid = false;
+        }
+        ++x;
+    }
+
+    flushTlb++;
+
+    // If there's a second stage TLB (and we're not it) then flush it as well
+    if (!isStage2 && !hyp) {
+        stage2Tlb->flushAllNs(false, true);
+    }
+}
 
 void
-TLB::flushMvaAsid(Addr mva, uint64_t asn)
+TLB::flushMvaAsid(Addr mva, uint64_t asn, bool secure_lookup, uint8_t target_el)
 {
-    DPRINTF(TLB, "Flushing mva %#x asid: %#x\n", mva, asn);
-    TlbEntry *te;
-
-    te = lookup(mva, asn);
-    while (te != NULL) {
-     DPRINTF(TLB, " -  %#x, asn %d ppn %#x size: %#x ap:%d\n",
-            te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap);
-        te->valid = false;
-        flushedEntries++;
-        te = lookup(mva,asn);
-    }
+    DPRINTF(TLB, "Flushing TLB entries with mva: %#x, asid: %#x "
+            "(%s lookup)\n", mva, asn, (secure_lookup ?
+            "secure" : "non-secure"));
+    _flushMva(mva, asn, secure_lookup, false, false, target_el);
     flushTlbMvaAsid++;
 }
 
 void
-TLB::flushAsid(uint64_t asn)
+TLB::flushAsid(uint64_t asn, bool secure_lookup, uint8_t target_el)
 {
-    DPRINTF(TLB, "Flushing all entries with asid: %#x\n", asn);
+    DPRINTF(TLB, "Flushing TLB entries with asid: %#x (%s lookup)\n", asn,
+            (secure_lookup ? "secure" : "non-secure"));
 
-    int x = 0;
+    int x = 0 ;
     TlbEntry *te;
 
     while (x < size) {
         te = &table[x];
-        if (te->asid == asn) {
+        if (te->valid && te->asid == asn && secure_lookup == !te->nstid &&
+            (te->vmid == vmid || secure_lookup) &&
+            checkELMatch(target_el, te->el, false)) {
+
             te->valid = false;
-            DPRINTF(TLB, " -  %#x, asn %d ppn %#x size: %#x ap:%d\n",
-                te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap);
+            DPRINTF(TLB, " -  %s\n", te->print());
             flushedEntries++;
         }
-        x++;
+        ++x;
     }
     flushTlbAsid++;
 }
 
 void
-TLB::flushMva(Addr mva)
+TLB::flushMva(Addr mva, bool secure_lookup, bool hyp, uint8_t target_el)
 {
-    DPRINTF(TLB, "Flushing all entries with mva: %#x\n", mva);
+    DPRINTF(TLB, "Flushing TLB entries with mva: %#x (%s lookup)\n", mva,
+            (secure_lookup ? "secure" : "non-secure"));
+    _flushMva(mva, 0xbeef, secure_lookup, hyp, true, target_el);
+    flushTlbMva++;
+}
 
-    int x = 0;
+void
+TLB::_flushMva(Addr mva, uint64_t asn, bool secure_lookup, bool hyp,
+               bool ignore_asn, uint8_t target_el)
+{
     TlbEntry *te;
-
-    while (x < size) {
-        te = &table[x];
-        Addr v = te->vpn << te->N;
-        if (mva >= v && mva < v + te->size) {
+    // D5.7.2: Sign-extend address to 64 bits
+    mva = sext<56>(mva);
+    te = lookup(mva, asn, vmid, hyp, secure_lookup, false, ignore_asn,
+                target_el);
+    while (te != NULL) {
+        if (secure_lookup == !te->nstid) {
+            DPRINTF(TLB, " -  %s\n", te->print());
             te->valid = false;
-            DPRINTF(TLB, " -  %#x, asn %d ppn %#x size: %#x ap:%d\n",
-                te->vpn << te->N, te->asid, te->pfn << te->N, te->size, te->ap);
             flushedEntries++;
         }
-        x++;
+        te = lookup(mva, asn, vmid, hyp, secure_lookup, false, ignore_asn,
+                    target_el);
     }
-    flushTlbMva++;
+}
+
+bool
+TLB::checkELMatch(uint8_t target_el, uint8_t tentry_el, bool ignore_el)
+{
+    bool elMatch = true;
+    if (!ignore_el) {
+        if (target_el == 2 || target_el == 3) {
+            elMatch = (tentry_el  == target_el);
+        } else {
+            elMatch = (tentry_el == 0) || (tentry_el  == 1);
+        }
+    }
+    return elMatch;
 }
 
 void
@@ -273,6 +359,10 @@ TLB::serialize(ostream &os)
     DPRINTF(Checkpoint, "Serializing Arm TLB\n");
 
     SERIALIZE_SCALAR(_attr);
+    SERIALIZE_SCALAR(haveLPAE);
+    SERIALIZE_SCALAR(directToStage2);
+    SERIALIZE_SCALAR(stage2Req);
+    SERIALIZE_SCALAR(bootUncacheability);
 
     int num_entries = size;
     SERIALIZE_SCALAR(num_entries);
@@ -288,6 +378,11 @@ TLB::unserialize(Checkpoint *cp, const string &section)
     DPRINTF(Checkpoint, "Unserializing Arm TLB\n");
 
     UNSERIALIZE_SCALAR(_attr);
+    UNSERIALIZE_SCALAR(haveLPAE);
+    UNSERIALIZE_SCALAR(directToStage2);
+    UNSERIALIZE_SCALAR(stage2Req);
+    UNSERIALIZE_SCALAR(bootUncacheability);
+
     int num_entries;
     UNSERIALIZE_SCALAR(num_entries);
     for(int i = 0; i < min(size, num_entries); i++){
@@ -413,11 +508,15 @@ TLB::regStats()
 
 Fault
 TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode,
-        Translation *translation, bool &delay, bool timing)
+                 Translation *translation, bool &delay, bool timing)
 {
-    if (!miscRegValid)
-        updateMiscReg(tc);
-    Addr vaddr = req->getVaddr();
+    updateMiscReg(tc);
+    Addr vaddr_tainted = req->getVaddr();
+    Addr vaddr = 0;
+    if (aarch64)
+        vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL);
+    else
+        vaddr = vaddr_tainted;
     uint32_t flags = req->getFlags();
 
     bool is_fetch = (mode == Execute);
@@ -426,8 +525,12 @@ TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode,
     if (!is_fetch) {
         assert(flags & MustBeOne);
         if (sctlr.a || !(flags & AllowUnaligned)) {
-            if (vaddr & flags & AlignmentMask) {
-                return new DataAbort(vaddr, 0, is_write, ArmFault::AlignmentFault);
+            if (vaddr & mask(flags & AlignmentMask)) {
+                // LPAE is always disabled in SE mode
+                return new DataAbort(vaddr_tainted,
+                        TlbEntry::DomainType::NoAccess, is_write,
+                                     ArmFault::AlignmentFault, isStage2,
+                                     ArmFault::VmsaTran);
             }
         }
     }
@@ -436,56 +539,411 @@ TLB::translateSe(RequestPtr req, ThreadContext *tc, Mode mode,
     Process *p = tc->getProcessPtr();
 
     if (!p->pTable->translate(vaddr, paddr))
-        return Fault(new GenericPageTableFault(vaddr));
+        return Fault(new GenericPageTableFault(vaddr_tainted));
     req->setPaddr(paddr);
 
     return NoFault;
 }
 
 Fault
-TLB::trickBoxCheck(RequestPtr req, Mode mode, uint8_t domain, bool sNp)
+TLB::trickBoxCheck(RequestPtr req, Mode mode, TlbEntry::DomainType domain)
 {
     return NoFault;
 }
 
 Fault
-TLB::walkTrickBoxCheck(Addr pa, Addr va, Addr sz, bool is_exec,
-        bool is_write, uint8_t domain, bool sNp)
+TLB::walkTrickBoxCheck(Addr pa, bool is_secure, Addr va, Addr sz, bool is_exec,
+        bool is_write, TlbEntry::DomainType domain, LookupLevel lookup_level)
 {
     return NoFault;
 }
 
+Fault
+TLB::checkPermissions(TlbEntry *te, RequestPtr req, Mode mode)
+{
+    Addr vaddr = req->getVaddr(); // 32-bit don't have to purify
+    uint32_t flags = req->getFlags();
+    bool is_fetch  = (mode == Execute);
+    bool is_write  = (mode == Write);
+    bool is_priv   = isPriv && !(flags & UserMode);
+
+    // Get the translation type from the actuall table entry
+    ArmFault::TranMethod tranMethod = te->longDescFormat ? ArmFault::LpaeTran
+                                                         : ArmFault::VmsaTran;
+
+    // If this is the second stage of translation and the request is for a
+    // stage 1 page table walk then we need to check the HCR.PTW bit. This
+    // allows us to generate a fault if the request targets an area marked
+    // as a device or strongly ordered.
+    if (isStage2 && req->isPTWalk() && hcr.ptw &&
+        (te->mtype != TlbEntry::MemoryType::Normal)) {
+        return new DataAbort(vaddr, te->domain, is_write,
+                             ArmFault::PermissionLL + te->lookupLevel,
+                             isStage2, tranMethod);
+    }
+
+    // Generate an alignment fault for unaligned data accesses to device or
+    // strongly ordered memory
+    if (!is_fetch) {
+        if (te->mtype != TlbEntry::MemoryType::Normal) {
+            if (vaddr & mask(flags & AlignmentMask)) {
+                alignFaults++;
+                return new DataAbort(vaddr, TlbEntry::DomainType::NoAccess, is_write,
+                                     ArmFault::AlignmentFault, isStage2,
+                                     tranMethod);
+            }
+        }
+    }
+
+    if (te->nonCacheable) {
+        // Prevent prefetching from I/O devices.
+        if (req->isPrefetch()) {
+            // Here we can safely use the fault status for the short
+            // desc. format in all cases
+            return new PrefetchAbort(vaddr, ArmFault::PrefetchUncacheable,
+                                     isStage2, tranMethod);
+        }
+    }
+
+    if (!te->longDescFormat) {
+        switch ((dacr >> (static_cast<uint8_t>(te->domain) * 2)) & 0x3) {
+          case 0:
+            domainFaults++;
+            DPRINTF(TLB, "TLB Fault: Data abort on domain. DACR: %#x"
+                    " domain: %#x write:%d\n", dacr,
+                    static_cast<uint8_t>(te->domain), is_write);
+            if (is_fetch)
+                return new PrefetchAbort(vaddr,
+                                         ArmFault::DomainLL + te->lookupLevel,
+                                         isStage2, tranMethod);
+            else
+                return new DataAbort(vaddr, te->domain, is_write,
+                                     ArmFault::DomainLL + te->lookupLevel,
+                                     isStage2, tranMethod);
+          case 1:
+            // Continue with permissions check
+            break;
+          case 2:
+            panic("UNPRED domain\n");
+          case 3:
+            return NoFault;
+        }
+    }
+
+    // The 'ap' variable is AP[2:0] or {AP[2,1],1b'0}, i.e. always three bits
+    uint8_t ap  = te->longDescFormat ? te->ap << 1 : te->ap;
+    uint8_t hap = te->hap;
+
+    if (sctlr.afe == 1 || te->longDescFormat)
+        ap |= 1;
+
+    bool abt;
+    bool isWritable = true;
+    // If this is a stage 2 access (eg for reading stage 1 page table entries)
+    // then don't perform the AP permissions check, we stil do the HAP check
+    // below.
+    if (isStage2) {
+        abt = false;
+    } else {
+        switch (ap) {
+          case 0:
+            DPRINTF(TLB, "Access permissions 0, checking rs:%#x\n",
+                    (int)sctlr.rs);
+            if (!sctlr.xp) {
+                switch ((int)sctlr.rs) {
+                  case 2:
+                    abt = is_write;
+                    break;
+                  case 1:
+                    abt = is_write || !is_priv;
+                    break;
+                  case 0:
+                  case 3:
+                  default:
+                    abt = true;
+                    break;
+                }
+            } else {
+                abt = true;
+            }
+            break;
+          case 1:
+            abt = !is_priv;
+            break;
+          case 2:
+            abt = !is_priv && is_write;
+            isWritable = is_priv;
+            break;
+          case 3:
+            abt = false;
+            break;
+          case 4:
+            panic("UNPRED premissions\n");
+          case 5:
+            abt = !is_priv || is_write;
+            isWritable = false;
+            break;
+          case 6:
+          case 7:
+            abt        = is_write;
+            isWritable = false;
+            break;
+          default:
+            panic("Unknown permissions %#x\n", ap);
+        }
+    }
+
+    bool hapAbt = is_write ? !(hap & 2) : !(hap & 1);
+    bool xn     = te->xn || (isWritable && sctlr.wxn) ||
+                            (ap == 3    && sctlr.uwxn && is_priv);
+    if (is_fetch && (abt || xn ||
+                     (te->longDescFormat && te->pxn && !is_priv) ||
+                     (isSecure && te->ns && scr.sif))) {
+        permsFaults++;
+        DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. AP:%d "
+                     "priv:%d write:%d ns:%d sif:%d sctlr.afe: %d \n",
+                     ap, is_priv, is_write, te->ns, scr.sif,sctlr.afe);
+        return new PrefetchAbort(vaddr,
+                                 ArmFault::PermissionLL + te->lookupLevel,
+                                 isStage2, tranMethod);
+    } else if (abt | hapAbt) {
+        permsFaults++;
+        DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d priv:%d"
+               " write:%d\n", ap, is_priv, is_write);
+        return new DataAbort(vaddr, te->domain, is_write,
+                             ArmFault::PermissionLL + te->lookupLevel,
+                             isStage2 | !abt, tranMethod);
+    }
+    return NoFault;
+}
+
+
+Fault
+TLB::checkPermissions64(TlbEntry *te, RequestPtr req, Mode mode,
+                        ThreadContext *tc)
+{
+    assert(aarch64);
+
+    Addr vaddr_tainted = req->getVaddr();
+    Addr vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL);
+
+    uint32_t flags = req->getFlags();
+    bool is_fetch  = (mode == Execute);
+    bool is_write  = (mode == Write);
+    bool is_priv M5_VAR_USED  = isPriv && !(flags & UserMode);
+
+    updateMiscReg(tc, curTranType);
+
+    // If this is the second stage of translation and the request is for a
+    // stage 1 page table walk then we need to check the HCR.PTW bit. This
+    // allows us to generate a fault if the request targets an area marked
+    // as a device or strongly ordered.
+    if (isStage2 && req->isPTWalk() && hcr.ptw &&
+        (te->mtype != TlbEntry::MemoryType::Normal)) {
+        return new DataAbort(vaddr_tainted, te->domain, is_write,
+                             ArmFault::PermissionLL + te->lookupLevel,
+                             isStage2, ArmFault::LpaeTran);
+    }
+
+    // Generate an alignment fault for unaligned accesses to device or
+    // strongly ordered memory
+    if (!is_fetch) {
+        if (te->mtype != TlbEntry::MemoryType::Normal) {
+            if (vaddr & mask(flags & AlignmentMask)) {
+                alignFaults++;
+                return new DataAbort(vaddr_tainted,
+                                     TlbEntry::DomainType::NoAccess, is_write,
+                                     ArmFault::AlignmentFault, isStage2,
+                                     ArmFault::LpaeTran);
+            }
+        }
+    }
+
+    if (te->nonCacheable) {
+        // Prevent prefetching from I/O devices.
+        if (req->isPrefetch()) {
+            // Here we can safely use the fault status for the short
+            // desc. format in all cases
+            return new PrefetchAbort(vaddr_tainted,
+                                     ArmFault::PrefetchUncacheable,
+                                     isStage2, ArmFault::LpaeTran);
+        }
+    }
+
+    uint8_t ap  = 0x3 & (te->ap);  // 2-bit access protection field
+    bool grant = false;
+
+    uint8_t xn =  te->xn;
+    uint8_t pxn = te->pxn;
+    bool r = !is_write && !is_fetch;
+    bool w = is_write;
+    bool x = is_fetch;
+    DPRINTF(TLBVerbose, "Checking permissions: ap:%d, xn:%d, pxn:%d, r:%d, "
+                        "w:%d, x:%d\n", ap, xn, pxn, r, w, x);
+
+    if (isStage2) {
+        panic("Virtualization in AArch64 state is not supported yet");
+    } else {
+        switch (aarch64EL) {
+          case EL0:
+            {
+                uint8_t perm = (ap << 2)  | (xn << 1) | pxn;
+                switch (perm) {
+                  case 0:
+                  case 1:
+                  case 8:
+                  case 9:
+                    grant = x;
+                    break;
+                  case 4:
+                  case 5:
+                    grant = r || w || (x && !sctlr.wxn);
+                    break;
+                  case 6:
+                  case 7:
+                    grant = r || w;
+                    break;
+                  case 12:
+                  case 13:
+                    grant = r || x;
+                    break;
+                  case 14:
+                  case 15:
+                    grant = r;
+                    break;
+                  default:
+                    grant = false;
+                }
+            }
+            break;
+          case EL1:
+            {
+                uint8_t perm = (ap << 2)  | (xn << 1) | pxn;
+                switch (perm) {
+                  case 0:
+                  case 2:
+                    grant = r || w || (x && !sctlr.wxn);
+                    break;
+                  case 1:
+                  case 3:
+                  case 4:
+                  case 5:
+                  case 6:
+                  case 7:
+                    // regions that are writeable at EL0 should not be
+                    // executable at EL1
+                    grant = r || w;
+                    break;
+                  case 8:
+                  case 10:
+                  case 12:
+                  case 14:
+                    grant = r || x;
+                    break;
+                  case 9:
+                  case 11:
+                  case 13:
+                  case 15:
+                    grant = r;
+                    break;
+                  default:
+                    grant = false;
+                }
+            }
+            break;
+          case EL2:
+          case EL3:
+            {
+                uint8_t perm = (ap & 0x2) | xn;
+                switch (perm) {
+                  case 0:
+                    grant = r || w || (x && !sctlr.wxn) ;
+                    break;
+                  case 1:
+                    grant = r || w;
+                    break;
+                  case 2:
+                    grant = r || x;
+                    break;
+                  case 3:
+                    grant = r;
+                    break;
+                  default:
+                    grant = false;
+                }
+            }
+            break;
+        }
+    }
+
+    if (!grant) {
+        if (is_fetch) {
+            permsFaults++;
+            DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. "
+                    "AP:%d priv:%d write:%d ns:%d sif:%d "
+                    "sctlr.afe: %d\n",
+                    ap, is_priv, is_write, te->ns, scr.sif, sctlr.afe);
+            // Use PC value instead of vaddr because vaddr might be aligned to
+            // cache line and should not be the address reported in FAR
+            return new PrefetchAbort(req->getPC(),
+                                     ArmFault::PermissionLL + te->lookupLevel,
+                                     isStage2, ArmFault::LpaeTran);
+        } else {
+            permsFaults++;
+            DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d "
+                    "priv:%d write:%d\n", ap, is_priv, is_write);
+            return new DataAbort(vaddr_tainted, te->domain, is_write,
+                                 ArmFault::PermissionLL + te->lookupLevel,
+                                 isStage2, ArmFault::LpaeTran);
+        }
+    }
+
+    return NoFault;
+}
+
 Fault
 TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
-        Translation *translation, bool &delay, bool timing, bool functional)
+        Translation *translation, bool &delay, bool timing,
+        TLB::ArmTranslationType tranType, bool functional)
 {
     // No such thing as a functional timing access
     assert(!(timing && functional));
 
-    if (!miscRegValid) {
-        updateMiscReg(tc);
-        DPRINTF(TLBVerbose, "TLB variables changed!\n");
-    }
+    updateMiscReg(tc, tranType);
 
-    Addr vaddr = req->getVaddr();
+    Addr vaddr_tainted = req->getVaddr();
+    Addr vaddr = 0;
+    if (aarch64)
+        vaddr = purifyTaggedAddr(vaddr_tainted, tc, aarch64EL);
+    else
+        vaddr = vaddr_tainted;
     uint32_t flags = req->getFlags();
 
-    bool is_fetch = (mode == Execute);
-    bool is_write = (mode == Write);
-    bool is_priv = isPriv && !(flags & UserMode);
+    bool is_fetch  = (mode == Execute);
+    bool is_write  = (mode == Write);
+    bool long_desc_format = aarch64 || (haveLPAE && ttbcr.eae);
+    ArmFault::TranMethod tranMethod = long_desc_format ? ArmFault::LpaeTran
+                                                       : ArmFault::VmsaTran;
 
-    req->setAsid(contextId.asid);
-    if (is_priv)
-        req->setFlags(Request::PRIVILEGED);
+    req->setAsid(asid);
 
-    req->taskId(tc->getCpuPtr()->taskId());
+    DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d secure:%d S1S2NsTran:%d\n",
+            isPriv, flags & UserMode, isSecure, tranType & S1S2NsTran);
+
+    DPRINTF(TLB, "translateFs addr %#x, mode %d, st2 %d, scr %#x sctlr %#x "
+                 "flags %#x tranType 0x%x\n", vaddr_tainted, mode, isStage2,
+                 scr, sctlr, flags, tranType);
+
+    // Generate an alignment fault for unaligned PC
+    if (aarch64 && is_fetch && (req->getPC() & mask(2))) {
+        return new PCAlignmentFault(req->getPC());
+    }
 
-    DPRINTF(TLBVerbose, "CPSR is priv:%d UserMode:%d\n",
-            isPriv, flags & UserMode);
     // If this is a clrex instruction, provide a PA of 0 with no fault
     // This will force the monitor to set the tracked address to 0
     // a bit of a hack but this effectively clrears this processors monitor
     if (flags & Request::CLEAR_LL){
+        // @todo: check implications of security extensions
        req->setPaddr(0);
        req->setFlags(Request::UNCACHEABLE);
        req->setFlags(Request::CLEAR_LL);
@@ -498,46 +956,351 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
     if (!is_fetch) {
         assert(flags & MustBeOne);
         if (sctlr.a || !(flags & AllowUnaligned)) {
-            if (vaddr & flags & AlignmentMask) {
+            if (vaddr & mask(flags & AlignmentMask)) {
                 alignFaults++;
-                return new DataAbort(vaddr, 0, is_write, ArmFault::AlignmentFault);
+                return new DataAbort(vaddr_tainted,
+                                     TlbEntry::DomainType::NoAccess, is_write,
+                                     ArmFault::AlignmentFault, isStage2,
+                                     tranMethod);
             }
         }
     }
 
-    Fault fault;
+    // If guest MMU is off or hcr.vm=0 go straight to stage2
+    if ((isStage2 && !hcr.vm) || (!isStage2 && !sctlr.m)) {
 
-    if (!sctlr.m) {
         req->setPaddr(vaddr);
-        if (sctlr.tre == 0) {
+        // When the MMU is off the security attribute corresponds to the
+        // security state of the processor
+        if (isSecure)
+            req->setFlags(Request::SECURE);
+
+        // @todo: double check this (ARM ARM issue C B3.2.1)
+        if (long_desc_format || sctlr.tre == 0) {
             req->setFlags(Request::UNCACHEABLE);
         } else {
             if (nmrr.ir0 == 0 || nmrr.or0 == 0 || prrr.tr0 != 0x2)
-               req->setFlags(Request::UNCACHEABLE);
+                req->setFlags(Request::UNCACHEABLE);
         }
 
         // Set memory attributes
         TlbEntry temp_te;
-        tableWalker->memAttrs(tc, temp_te, sctlr, 0, 1);
-        temp_te.shareable = true;
+        temp_te.ns = !isSecure;
+        if (isStage2 || hcr.dc == 0 || isSecure ||
+           (isHyp && !(tranType & S1CTran))) {
+
+            temp_te.mtype      = is_fetch ? TlbEntry::MemoryType::Normal
+                                          : TlbEntry::MemoryType::StronglyOrdered;
+            temp_te.innerAttrs = 0x0;
+            temp_te.outerAttrs = 0x0;
+            temp_te.shareable  = true;
+            temp_te.outerShareable = true;
+        } else {
+            temp_te.mtype      = TlbEntry::MemoryType::Normal;
+            temp_te.innerAttrs = 0x3;
+            temp_te.outerAttrs = 0x3;
+            temp_te.shareable  = false;
+            temp_te.outerShareable = false;
+        }
+        temp_te.setAttributes(long_desc_format);
         DPRINTF(TLBVerbose, "(No MMU) setting memory attributes: shareable:\
-                %d, innerAttrs: %d, outerAttrs: %d\n", temp_te.shareable,
-                temp_te.innerAttrs, temp_te.outerAttrs);
+                %d, innerAttrs: %d, outerAttrs: %d, isStage2: %d\n",
+                temp_te.shareable, temp_te.innerAttrs, temp_te.outerAttrs,
+                isStage2);
         setAttr(temp_te.attributes);
 
-        return trickBoxCheck(req, mode, 0, false);
+        return trickBoxCheck(req, mode, TlbEntry::DomainType::NoAccess);
     }
 
-    DPRINTF(TLBVerbose, "Translating vaddr=%#x context=%d\n", vaddr, contextId);
+    DPRINTF(TLBVerbose, "Translating %s=%#x context=%d\n",
+            isStage2 ? "IPA" : "VA", vaddr_tainted, asid);
     // Translation enabled
 
-    TlbEntry *te = lookup(vaddr, contextId);
-    if (te == NULL) {
-        if (req->isPrefetch()){
-           //if the request is a prefetch don't attempt to fill the TLB
-           //or go any further with the memory access
+    TlbEntry *te = NULL;
+    TlbEntry mergeTe;
+    Fault fault = getResultTe(&te, req, tc, mode, translation, timing,
+                              functional, &mergeTe);
+    // only proceed if we have a valid table entry
+    if ((te == NULL) && (fault == NoFault)) delay = true;
+
+    // If we have the table entry transfer some of the attributes to the
+    // request that triggered the translation
+    if (te != NULL) {
+        // Set memory attributes
+        DPRINTF(TLBVerbose,
+                "Setting memory attributes: shareable: %d, innerAttrs: %d, \
+                outerAttrs: %d, mtype: %d, isStage2: %d\n",
+                te->shareable, te->innerAttrs, te->outerAttrs,
+                static_cast<uint8_t>(te->mtype), isStage2);
+        setAttr(te->attributes);
+        if (te->nonCacheable) {
+            req->setFlags(Request::UNCACHEABLE);
+        }
+
+        if (!bootUncacheability &&
+            ((ArmSystem*)tc->getSystemPtr())->adderBootUncacheable(vaddr)) {
+            req->setFlags(Request::UNCACHEABLE);
+        }
+
+        req->setPaddr(te->pAddr(vaddr));
+        if (isSecure && !te->ns) {
+            req->setFlags(Request::SECURE);
+        }
+        if ((!is_fetch) && (vaddr & mask(flags & AlignmentMask)) &&
+            (te->mtype != TlbEntry::MemoryType::Normal)) {
+                // Unaligned accesses to Device memory should always cause an
+                // abort regardless of sctlr.a
+                alignFaults++;
+                return new DataAbort(vaddr_tainted,
+                                     TlbEntry::DomainType::NoAccess, is_write,
+                                     ArmFault::AlignmentFault, isStage2,
+                                     tranMethod);
+        }
+
+        // Check for a trickbox generated address fault
+        if (fault == NoFault) {
+            fault = trickBoxCheck(req, mode, te->domain);
+        }
+    }
+
+    // Generate Illegal Inst Set State fault if IL bit is set in CPSR
+    if (fault == NoFault) {
+        CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+        if (aarch64 && is_fetch && cpsr.il == 1) {
+            return new IllegalInstSetStateFault();
+        }
+    }
+
+    return fault;
+}
+
+Fault
+TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
+    TLB::ArmTranslationType tranType)
+{
+    updateMiscReg(tc, tranType);
+
+    if (directToStage2) {
+        assert(stage2Tlb);
+        return stage2Tlb->translateAtomic(req, tc, mode, tranType);
+    }
+
+    bool delay = false;
+    Fault fault;
+    if (FullSystem)
+        fault = translateFs(req, tc, mode, NULL, delay, false, tranType);
+    else
+        fault = translateSe(req, tc, mode, NULL, delay, false);
+    assert(!delay);
+    return fault;
+}
+
+Fault
+TLB::translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode,
+    TLB::ArmTranslationType tranType)
+{
+    updateMiscReg(tc, tranType);
+
+    if (directToStage2) {
+        assert(stage2Tlb);
+        return stage2Tlb->translateFunctional(req, tc, mode, tranType);
+    }
+
+    bool delay = false;
+    Fault fault;
+    if (FullSystem)
+        fault = translateFs(req, tc, mode, NULL, delay, false, tranType, true);
+   else
+        fault = translateSe(req, tc, mode, NULL, delay, false);
+    assert(!delay);
+    return fault;
+}
+
+Fault
+TLB::translateTiming(RequestPtr req, ThreadContext *tc,
+    Translation *translation, Mode mode, TLB::ArmTranslationType tranType)
+{
+    updateMiscReg(tc, tranType);
+
+    if (directToStage2) {
+        assert(stage2Tlb);
+        return stage2Tlb->translateTiming(req, tc, translation, mode, tranType);
+    }
+
+    assert(translation);
+
+    return translateComplete(req, tc, translation, mode, tranType, isStage2);
+}
+
+Fault
+TLB::translateComplete(RequestPtr req, ThreadContext *tc,
+        Translation *translation, Mode mode, TLB::ArmTranslationType tranType,
+        bool callFromS2)
+{
+    bool delay = false;
+    Fault fault;
+    if (FullSystem)
+        fault = translateFs(req, tc, mode, translation, delay, true, tranType);
+    else
+        fault = translateSe(req, tc, mode, translation, delay, true);
+    DPRINTF(TLBVerbose, "Translation returning delay=%d fault=%d\n", delay, fault !=
+            NoFault);
+    // If we have a translation, and we're not in the middle of doing a stage
+    // 2 translation tell the translation that we've either finished or its
+    // going to take a while. By not doing this when we're in the middle of a
+    // stage 2 translation we prevent marking the translation as delayed twice,
+    // one when the translation starts and again when the stage 1 translation
+    // completes.
+    if (translation && (callFromS2 || !stage2Req || req->hasPaddr() || fault != NoFault)) {
+        if (!delay)
+            translation->finish(fault, req, tc, mode);
+        else
+            translation->markDelayed();
+    }
+    return fault;
+}
+
+BaseMasterPort*
+TLB::getMasterPort()
+{
+    return &tableWalker->getMasterPort("port");
+}
+
+DmaPort&
+TLB::getWalkerPort()
+{
+    return tableWalker->getWalkerPort();
+}
+
+void
+TLB::updateMiscReg(ThreadContext *tc, ArmTranslationType tranType)
+{
+    // check if the regs have changed, or the translation mode is different.
+    // NOTE: the tran type doesn't affect stage 2 TLB's as they only handle
+    // one type of translation anyway
+    if (miscRegValid && ((tranType == curTranType) || isStage2)) {
+        return;
+    }
+
+    DPRINTF(TLBVerbose, "TLB variables changed!\n");
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    // Dependencies: SCR/SCR_EL3, CPSR
+    isSecure  = inSecureState(tc);
+    isSecure &= (tranType & HypMode)    == 0;
+    isSecure &= (tranType & S1S2NsTran) == 0;
+    aarch64 = !cpsr.width;
+    if (aarch64) {  // AArch64
+        aarch64EL = (ExceptionLevel) (uint8_t) cpsr.el;
+        switch (aarch64EL) {
+          case EL0:
+          case EL1:
+            {
+                sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1);
+                ttbcr = tc->readMiscReg(MISCREG_TCR_EL1);
+                uint64_t ttbr_asid = ttbcr.a1 ?
+                    tc->readMiscReg(MISCREG_TTBR1_EL1) :
+                    tc->readMiscReg(MISCREG_TTBR0_EL1);
+                asid = bits(ttbr_asid,
+                            (haveLargeAsid64 && ttbcr.as) ? 63 : 55, 48);
+            }
+            break;
+          case EL2:
+            sctlr = tc->readMiscReg(MISCREG_SCTLR_EL2);
+            ttbcr = tc->readMiscReg(MISCREG_TCR_EL2);
+            asid = -1;
+            break;
+          case EL3:
+            sctlr = tc->readMiscReg(MISCREG_SCTLR_EL3);
+            ttbcr = tc->readMiscReg(MISCREG_TCR_EL3);
+            asid = -1;
+            break;
+        }
+        scr = tc->readMiscReg(MISCREG_SCR_EL3);
+        isPriv = aarch64EL != EL0;
+        // @todo: modify this behaviour to support Virtualization in
+        // AArch64
+        vmid           = 0;
+        isHyp          = false;
+        directToStage2 = false;
+        stage2Req      = false;
+    } else {  // AArch32
+        sctlr  = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_SCTLR, tc,
+                                 !isSecure));
+        ttbcr  = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_TTBCR, tc,
+                                 !isSecure));
+        scr    = tc->readMiscReg(MISCREG_SCR);
+        isPriv = cpsr.mode != MODE_USER;
+        if (haveLPAE && ttbcr.eae) {
+            // Long-descriptor translation table format in use
+            uint64_t ttbr_asid = tc->readMiscReg(
+                flattenMiscRegNsBanked(ttbcr.a1 ? MISCREG_TTBR1
+                                                : MISCREG_TTBR0,
+                                       tc, !isSecure));
+            asid = bits(ttbr_asid, 55, 48);
+        } else {
+            // Short-descriptor translation table format in use
+            CONTEXTIDR context_id = tc->readMiscReg(flattenMiscRegNsBanked(
+                MISCREG_CONTEXTIDR, tc,!isSecure));
+            asid = context_id.asid;
+        }
+        prrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_PRRR, tc,
+                               !isSecure));
+        nmrr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_NMRR, tc,
+                               !isSecure));
+        dacr = tc->readMiscReg(flattenMiscRegNsBanked(MISCREG_DACR, tc,
+                               !isSecure));
+        hcr  = tc->readMiscReg(MISCREG_HCR);
+
+        if (haveVirtualization) {
+            vmid   = bits(tc->readMiscReg(MISCREG_VTTBR), 55, 48);
+            isHyp  = cpsr.mode == MODE_HYP;
+            isHyp |=  tranType & HypMode;
+            isHyp &= (tranType & S1S2NsTran) == 0;
+            isHyp &= (tranType & S1CTran)    == 0;
+            if (isHyp) {
+                sctlr = tc->readMiscReg(MISCREG_HSCTLR);
+            }
+            // Work out if we should skip the first stage of translation and go
+            // directly to stage 2. This value is cached so we don't have to
+            // compute it for every translation.
+            stage2Req      = hcr.vm && !isStage2 && !isHyp && !isSecure &&
+                             !(tranType & S1CTran);
+            directToStage2 = stage2Req && !sctlr.m;
+        } else {
+            vmid           = 0;
+            stage2Req      = false;
+            isHyp          = false;
+            directToStage2 = false;
+        }
+    }
+    miscRegValid = true;
+    curTranType  = tranType;
+}
+
+Fault
+TLB::getTE(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode,
+        Translation *translation, bool timing, bool functional,
+        bool is_secure, TLB::ArmTranslationType tranType)
+{
+    bool is_fetch = (mode == Execute);
+    bool is_write = (mode == Write);
+
+    Addr vaddr_tainted = req->getVaddr();
+    Addr vaddr = 0;
+    ExceptionLevel target_el = aarch64 ? aarch64EL : EL1;
+    if (aarch64) {
+        vaddr = purifyTaggedAddr(vaddr_tainted, tc, target_el);
+    } else {
+        vaddr = vaddr_tainted;
+    }
+    *te = lookup(vaddr, asid, vmid, isHyp, is_secure, false, false, target_el);
+    if (*te == NULL) {
+        if (req->isPrefetch()) {
+            // if the request is a prefetch don't attempt to fill the TLB or go
+            // any further with the memory access (here we can safely use the
+            // fault status for the short desc. format in all cases)
            prefetchFaults++;
-           return new PrefetchAbort(vaddr, ArmFault::PrefetchTLBMiss);
+           return new PrefetchAbort(vaddr_tainted, ArmFault::PrefetchTLBMiss, isStage2);
         }
 
         if (is_fetch)
@@ -549,22 +1312,21 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
 
         // start translation table walk, pass variables rather than
         // re-retreaving in table walker for speed
-        DPRINTF(TLB, "TLB Miss: Starting hardware table walker for %#x(%d)\n",
-                vaddr, contextId);
-        fault = tableWalker->walk(req, tc, contextId, mode, translation,
-                                  timing, functional);
-        if (timing && fault == NoFault) {
-            delay = true;
-            // for timing mode, return and wait for table walk
+        DPRINTF(TLB, "TLB Miss: Starting hardware table walker for %#x(%d:%d)\n",
+                vaddr_tainted, asid, vmid);
+        Fault fault;
+        fault = tableWalker->walk(req, tc, asid, vmid, isHyp, mode,
+                                  translation, timing, functional, is_secure,
+                                  tranType);
+        // for timing mode, return and wait for table walk,
+        if (timing || fault != NoFault) {
             return fault;
         }
-        if (fault)
-            return fault;
 
-        te = lookup(vaddr, contextId);
-        if (!te)
+        *te = lookup(vaddr, asid, vmid, isHyp, is_secure, false, false, target_el);
+        if (!*te)
             printTlb();
-        assert(te);
+        assert(*te);
     } else {
         if (is_fetch)
             instHits++;
@@ -573,181 +1335,61 @@ TLB::translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
         else
             readHits++;
     }
-
-    // Set memory attributes
-    DPRINTF(TLBVerbose,
-            "Setting memory attributes: shareable: %d, innerAttrs: %d, \
-            outerAttrs: %d\n",
-            te->shareable, te->innerAttrs, te->outerAttrs);
-    setAttr(te->attributes);
-    if (te->nonCacheable) {
-        req->setFlags(Request::UNCACHEABLE);
-
-        // Prevent prefetching from I/O devices.
-        if (req->isPrefetch()) {
-            return new PrefetchAbort(vaddr, ArmFault::PrefetchUncacheable);
-        }
-    }
-
-    if (!bootUncacheability &&
-            ((ArmSystem*)tc->getSystemPtr())->adderBootUncacheable(vaddr))
-        req->setFlags(Request::UNCACHEABLE);
-
-    switch ( (dacr >> (te->domain * 2)) & 0x3) {
-      case 0:
-        domainFaults++;
-        DPRINTF(TLB, "TLB Fault: Data abort on domain. DACR: %#x domain: %#x"
-               " write:%d sNp:%d\n", dacr, te->domain, is_write, te->sNp);
-        if (is_fetch)
-            return new PrefetchAbort(vaddr,
-                (te->sNp ? ArmFault::Domain0 : ArmFault::Domain1));
-        else
-            return new DataAbort(vaddr, te->domain, is_write,
-                (te->sNp ? ArmFault::Domain0 : ArmFault::Domain1));
-      case 1:
-        // Continue with permissions check
-        break;
-      case 2:
-        panic("UNPRED domain\n");
-      case 3:
-        req->setPaddr(te->pAddr(vaddr));
-        fault = trickBoxCheck(req, mode, te->domain, te->sNp);
-        if (fault)
-            return fault;
-        return NoFault;
-    }
-
-    uint8_t ap = te->ap;
-
-    if (sctlr.afe == 1)
-        ap |= 1;
-
-    bool abt;
-
-   /* if (!sctlr.xp)
-        ap &= 0x3;
-*/
-    switch (ap) {
-      case 0:
-        DPRINTF(TLB, "Access permissions 0, checking rs:%#x\n", (int)sctlr.rs);
-        if (!sctlr.xp) {
-            switch ((int)sctlr.rs) {
-              case 2:
-                abt = is_write;
-                break;
-              case 1:
-                abt = is_write || !is_priv;
-                break;
-              case 0:
-              case 3:
-              default:
-                abt = true;
-                break;
-            }
-        } else {
-            abt = true;
-        }
-        break;
-      case 1:
-        abt = !is_priv;
-        break;
-      case 2:
-        abt = !is_priv && is_write;
-        break;
-      case 3:
-        abt = false;
-        break;
-      case 4:
-        panic("UNPRED premissions\n");
-      case 5:
-        abt = !is_priv || is_write;
-        break;
-      case 6:
-      case 7:
-        abt = is_write;
-        break;
-      default:
-        panic("Unknown permissions\n");
-    }
-    if ((is_fetch) && (abt || te->xn)) {
-        permsFaults++;
-        DPRINTF(TLB, "TLB Fault: Prefetch abort on permission check. AP:%d priv:%d"
-               " write:%d sNp:%d\n", ap, is_priv, is_write, te->sNp);
-        return new PrefetchAbort(vaddr,
-                (te->sNp ? ArmFault::Permission0 :
-                 ArmFault::Permission1));
-    } else if (abt) {
-        permsFaults++;
-        DPRINTF(TLB, "TLB Fault: Data abort on permission check. AP:%d priv:%d"
-               " write:%d sNp:%d\n", ap, is_priv, is_write, te->sNp);
-        return new DataAbort(vaddr, te->domain, is_write,
-                (te->sNp ? ArmFault::Permission0 :
-                 ArmFault::Permission1));
-    }
-
-    req->setPaddr(te->pAddr(vaddr));
-    // Check for a trickbox generated address fault
-    fault = trickBoxCheck(req, mode, te->domain, te->sNp);
-    if (fault)
-        return fault;
-
     return NoFault;
 }
 
 Fault
-TLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode)
+TLB::getResultTe(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode,
+        Translation *translation, bool timing, bool functional,
+        TlbEntry *mergeTe)
 {
-    bool delay = false;
     Fault fault;
-    if (FullSystem)
-        fault = translateFs(req, tc, mode, NULL, delay, false);
-    else
-        fault = translateSe(req, tc, mode, NULL, delay, false);
-    assert(!delay);
+    TlbEntry *s1Te = NULL;
+
+    Addr vaddr_tainted = req->getVaddr();
+
+    // Get the stage 1 table entry
+    fault = getTE(&s1Te, req, tc, mode, translation, timing, functional,
+                  isSecure, curTranType);
+    // only proceed if we have a valid table entry
+    if ((s1Te != NULL) && (fault == NoFault)) {
+        // Check stage 1 permissions before checking stage 2
+        if (aarch64)
+            fault = checkPermissions64(s1Te, req, mode, tc);
+        else
+            fault = checkPermissions(s1Te, req, mode);
+        if (stage2Req & (fault == NoFault)) {
+            Stage2LookUp *s2Lookup = new Stage2LookUp(this, stage2Tlb, *s1Te,
+                req, translation, mode, timing, functional, curTranType);
+            fault = s2Lookup->getTe(tc, mergeTe);
+            if (s2Lookup->isComplete()) {
+                *te = mergeTe;
+                // We've finished with the lookup so delete it
+                delete s2Lookup;
+            } else {
+                // The lookup hasn't completed, so we can't delete it now. We
+                // get round this by asking the object to self delete when the
+                // translation is complete.
+                s2Lookup->setSelfDelete();
+            }
+        } else {
+            // This case deals with an S1 hit (or bypass), followed by
+            // an S2 hit-but-perms issue
+            if (isStage2) {
+                DPRINTF(TLBVerbose, "s2TLB: reqVa %#x, reqPa %#x, fault %p\n",
+                        vaddr_tainted, req->hasPaddr() ? req->getPaddr() : ~0, fault);
+                if (fault != NoFault) {
+                    ArmFault *armFault = reinterpret_cast<ArmFault *>(fault.get());
+                    armFault->annotate(ArmFault::S1PTW, false);
+                    armFault->annotate(ArmFault::OVA, vaddr_tainted);
+                }
+            }
+            *te = s1Te;
+        }
+    }
     return fault;
 }
 
-Fault
-TLB::translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode)
-{
-    bool delay = false;
-    Fault fault;
-    if (FullSystem)
-        fault = translateFs(req, tc, mode, NULL, delay, false, true);
-    else
-        fault = translateSe(req, tc, mode, NULL, delay, false);
-    assert(!delay);
-    return fault;
-}
-
-Fault
-TLB::translateTiming(RequestPtr req, ThreadContext *tc,
-        Translation *translation, Mode mode)
-{
-    assert(translation);
-    bool delay = false;
-    Fault fault;
-    if (FullSystem)
-        fault = translateFs(req, tc, mode, translation, delay, true);
-    else
-        fault = translateSe(req, tc, mode, translation, delay, true);
-    DPRINTF(TLBVerbose, "Translation returning delay=%d fault=%d\n", delay, fault !=
-            NoFault);
-    if (!delay)
-        translation->finish(fault, req, tc, mode);
-    else
-        translation->markDelayed();
-    return fault;
-}
-
-BaseMasterPort*
-TLB::getMasterPort()
-{
-    return &tableWalker->getMasterPort("port");
-}
-
-
-
 ArmISA::TLB *
 ArmTLBParams::create()
 {
diff --git a/src/arch/arm/tlb.hh b/src/arch/arm/tlb.hh
index a66e28b06..ac8c672bf 100644
--- a/src/arch/arm/tlb.hh
+++ b/src/arch/arm/tlb.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -43,13 +43,13 @@
 #ifndef __ARCH_ARM_TLB_HH__
 #define __ARCH_ARM_TLB_HH__
 
-#include <map>
 
 #include "arch/arm/isa_traits.hh"
 #include "arch/arm/pagetable.hh"
 #include "arch/arm/utility.hh"
 #include "arch/arm/vtophys.hh"
 #include "base/statistics.hh"
+#include "dev/dma_device.hh"
 #include "mem/request.hh"
 #include "params/ArmTLB.hh"
 #include "sim/fault_fwd.hh"
@@ -60,36 +60,51 @@ class ThreadContext;
 namespace ArmISA {
 
 class TableWalker;
+class Stage2LookUp;
+class Stage2MMU;
 
 class TLB : public BaseTLB
 {
   public:
     enum ArmFlags {
-        AlignmentMask = 0x1f,
+        AlignmentMask = 0x7,
 
         AlignByte = 0x0,
         AlignHalfWord = 0x1,
-        AlignWord = 0x3,
-        AlignDoubleWord = 0x7,
-        AlignQuadWord = 0xf,
-        AlignOctWord = 0x1f,
+        AlignWord = 0x2,
+        AlignDoubleWord = 0x3,
+        AlignQuadWord = 0x4,
+        AlignOctWord = 0x5,
 
-        AllowUnaligned = 0x20,
+        AllowUnaligned = 0x8,
         // Priv code operating as if it wasn't
-        UserMode = 0x40,
+        UserMode = 0x10,
         // Because zero otherwise looks like a valid setting and may be used
         // accidentally, this bit must be non-zero to show it was used on
         // purpose.
-        MustBeOne = 0x80
+        MustBeOne = 0x40
+    };
+
+    enum ArmTranslationType {
+        NormalTran = 0,
+        S1CTran = 0x1,
+        HypMode = 0x2,
+        // Secure code operating as if it wasn't (required by some Address
+        // Translate operations)
+        S1S2NsTran = 0x4
     };
   protected:
-
-    TlbEntry *table;    // the Page Table
-    int size;           // TLB Size
-
-    uint32_t _attr;     // Memory attributes for last accessed TLB entry
+    TlbEntry* table;     // the Page Table
+    int size;            // TLB Size
+    bool isStage2;       // Indicates this TLB is part of the second stage MMU
+    bool stage2Req;      // Indicates whether a stage 2 lookup is also required
+    uint64_t _attr;      // Memory attributes for last accessed TLB entry
+    bool directToStage2; // Indicates whether all translation requests should
+                         // be routed directly to the stage 2 TLB
 
     TableWalker *tableWalker;
+    TLB *stage2Tlb;
+    Stage2MMU *stage2Mmu;
 
     // Access Stats
     mutable Stats::Scalar instHits;
@@ -121,51 +136,101 @@ class TLB : public BaseTLB
     bool bootUncacheability;
 
   public:
-    typedef ArmTLBParams Params;
-    TLB(const Params *p);
+    TLB(const ArmTLBParams *p);
+    TLB(const Params *p, int _size, TableWalker *_walker);
 
     /** Lookup an entry in the TLB
      * @param vpn virtual address
      * @param asn context id/address space id to use
+     * @param vmid The virtual machine ID used for stage 2 translation
+     * @param secure if the lookup is secure
+     * @param hyp if the lookup is done from hyp mode
      * @param functional if the lookup should modify state
-     * @return pointer to TLB entrry if it exists
+     * @param ignore_asn if on lookup asn should be ignored
+     * @return pointer to TLB entry if it exists
      */
-    TlbEntry *lookup(Addr vpn, uint8_t asn, bool functional = false);
+    TlbEntry *lookup(Addr vpn, uint16_t asn, uint8_t vmid, bool hyp,
+                     bool secure, bool functional,
+                     bool ignore_asn, uint8_t target_el);
 
     virtual ~TLB();
+
+    /// setup all the back pointers
+    virtual void init();
+
+    void setMMU(Stage2MMU *m);
+
     int getsize() const { return size; }
 
     void insert(Addr vaddr, TlbEntry &pte);
 
-    /** Reset the entire TLB */
-    void flushAll();
+    Fault getTE(TlbEntry **te, RequestPtr req, ThreadContext *tc, Mode mode,
+                Translation *translation, bool timing, bool functional,
+                bool is_secure, ArmTranslationType tranType);
+
+    Fault getResultTe(TlbEntry **te, RequestPtr req, ThreadContext *tc,
+                      Mode mode, Translation *translation, bool timing,
+                      bool functional, TlbEntry *mergeTe);
+
+    Fault checkPermissions(TlbEntry *te, RequestPtr req, Mode mode);
+    Fault checkPermissions64(TlbEntry *te, RequestPtr req, Mode mode,
+                             ThreadContext *tc);
+
+
+    /** Reset the entire TLB
+     * @param secure_lookup if the operation affects the secure world
+     */
+    void flushAllSecurity(bool secure_lookup, uint8_t target_el,
+                          bool ignore_el = false);
+
+    /** Remove all entries in the non secure world, depending on whether they
+     *  were allocated in hyp mode or not
+     * @param hyp if the opperation affects hyp mode
+     */
+    void flushAllNs(bool hyp, uint8_t target_el, bool ignore_el = false);
+
+
+    /** Reset the entire TLB. Used for CPU switching to prevent stale
+     * translations after multiple switches
+     */
+    void flushAll()
+    {
+        flushAllSecurity(false, 0, true);
+        flushAllSecurity(true, 0, true);
+    }
 
     /** Remove any entries that match both a va and asn
      * @param mva virtual address to flush
      * @param asn contextid/asn to flush on match
+     * @param secure_lookup if the operation affects the secure world
      */
-    void flushMvaAsid(Addr mva, uint64_t asn);
+    void flushMvaAsid(Addr mva, uint64_t asn, bool secure_lookup,
+                      uint8_t target_el);
 
     /** Remove any entries that match the asn
      * @param asn contextid/asn to flush on match
+     * @param secure_lookup if the operation affects the secure world
      */
-    void flushAsid(uint64_t asn);
+    void flushAsid(uint64_t asn, bool secure_lookup, uint8_t target_el);
 
     /** Remove all entries that match the va regardless of asn
      * @param mva address to flush from cache
+     * @param secure_lookup if the operation affects the secure world
+     * @param hyp if the operation affects hyp mode
      */
-    void flushMva(Addr mva);
+    void flushMva(Addr mva, bool secure_lookup, bool hyp, uint8_t target_el);
 
-    Fault trickBoxCheck(RequestPtr req, Mode mode, uint8_t domain, bool sNp);
-    Fault walkTrickBoxCheck(Addr pa, Addr va, Addr sz, bool is_exec,
-            bool is_write, uint8_t domain, bool sNp);
+    Fault trickBoxCheck(RequestPtr req, Mode mode, TlbEntry::DomainType domain);
+    Fault walkTrickBoxCheck(Addr pa, bool is_secure, Addr va, Addr sz, bool is_exec,
+            bool is_write, TlbEntry::DomainType domain, LookupLevel lookup_level);
 
-    void printTlb();
+    void printTlb() const;
 
     void allCpusCaching() { bootUncacheability = true; }
     void demapPage(Addr vaddr, uint64_t asn)
     {
-        flushMvaAsid(vaddr, asn);
+        // needed for x86 only
+        panic("demapPage() is not implemented.\n");
     }
 
     static bool validVirtualAddress(Addr vaddr);
@@ -184,16 +249,18 @@ class TLB : public BaseTLB
      * Do a functional lookup on the TLB (for checker cpu) that
      * behaves like a normal lookup without modifying any page table state.
      */
-    Fault translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode);
+    Fault translateFunctional(RequestPtr req, ThreadContext *tc, Mode mode,
+            ArmTranslationType tranType = NormalTran);
 
     /** Accessor functions for memory attributes for last accessed TLB entry
      */
     void
-    setAttr(uint32_t attr)
+    setAttr(uint64_t attr)
     {
         _attr = attr;
     }
-    uint32_t
+
+    uint64_t
     getAttr() const
     {
         return _attr;
@@ -201,12 +268,17 @@ class TLB : public BaseTLB
 
     Fault translateFs(RequestPtr req, ThreadContext *tc, Mode mode,
             Translation *translation, bool &delay,
-            bool timing, bool functional = false);
+            bool timing, ArmTranslationType tranType, bool functional = false);
     Fault translateSe(RequestPtr req, ThreadContext *tc, Mode mode,
             Translation *translation, bool &delay, bool timing);
-    Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode);
+    Fault translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
+            ArmTranslationType tranType = NormalTran);
     Fault translateTiming(RequestPtr req, ThreadContext *tc,
-            Translation *translation, Mode mode);
+            Translation *translation, Mode mode,
+            ArmTranslationType tranType = NormalTran);
+    Fault translateComplete(RequestPtr req, ThreadContext *tc,
+            Translation *translation, Mode mode, ArmTranslationType tranType,
+            bool callFromS2);
     Fault finalizePhysical(RequestPtr req, ThreadContext *tc, Mode mode) const;
 
     void drainResume();
@@ -229,29 +301,45 @@ class TLB : public BaseTLB
      */
     virtual BaseMasterPort* getMasterPort();
 
+    /**
+     * Allow the MMU (overseeing both stage 1 and stage 2 TLBs) to
+     * access the table walker port of this TLB so that it can
+     * orchestrate staged translations.
+     *
+     * @return The table walker DMA port
+     */
+    DmaPort& getWalkerPort();
+
     // Caching misc register values here.
     // Writing to misc registers needs to invalidate them.
     // translateFunctional/translateSe/translateFs checks if they are
     // invalid and call updateMiscReg if necessary.
 protected:
+    bool aarch64;
+    ExceptionLevel aarch64EL;
     SCTLR sctlr;
+    SCR scr;
     bool isPriv;
-    CONTEXTIDR contextId;
+    bool isSecure;
+    bool isHyp;
+    TTBCR ttbcr;
+    uint16_t asid;
+    uint8_t vmid;
     PRRR prrr;
     NMRR nmrr;
+    HCR hcr;
     uint32_t dacr;
     bool miscRegValid;
-    void updateMiscReg(ThreadContext *tc)
-    {
-        sctlr = tc->readMiscReg(MISCREG_SCTLR);
-        CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
-        isPriv = cpsr.mode != MODE_USER;
-        contextId = tc->readMiscReg(MISCREG_CONTEXTIDR);
-        prrr = tc->readMiscReg(MISCREG_PRRR);
-        nmrr = tc->readMiscReg(MISCREG_NMRR);
-        dacr = tc->readMiscReg(MISCREG_DACR);
-        miscRegValid = true;
-    }
+    ArmTranslationType curTranType;
+
+    // Cached copies of system-level properties
+    bool haveLPAE;
+    bool haveVirtualization;
+    bool haveLargeAsid64;
+
+    void updateMiscReg(ThreadContext *tc,
+                       ArmTranslationType tranType = NormalTran);
+
 public:
     const Params *
     params() const
@@ -259,6 +347,19 @@ public:
         return dynamic_cast<const Params *>(_params);
     }
     inline void invalidateMiscReg() { miscRegValid = false; }
+
+private:
+    /** Remove any entries that match both a va and asn
+     * @param mva virtual address to flush
+     * @param asn contextid/asn to flush on match
+     * @param secure_lookup if the operation affects the secure world
+     * @param hyp if the operation affects hyp mode
+     * @param ignore_asn if the flush should ignore the asn
+     */
+    void _flushMva(Addr mva, uint64_t asn, bool secure_lookup,
+                   bool hyp, bool ignore_asn, uint8_t target_el);
+
+    bool checkELMatch(uint8_t target_el, uint8_t tentry_el, bool ignore_el);
 };
 
 } // namespace ArmISA
diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh
index cd0b74b2d..7b736492b 100644
--- a/src/arch/arm/types.hh
+++ b/src/arch/arm/types.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -82,6 +82,7 @@ namespace ArmISA
         // Bitfields to select mode.
         Bitfield<36>     thumb;
         Bitfield<35>     bigThumb;
+        Bitfield<34>     aarch64;
 
         // Made up bitfields that make life easier.
         Bitfield<33>     sevenAndFour;
@@ -143,9 +144,9 @@ namespace ArmISA
         Bitfield<3,  0>  immedLo3_0;
 
         Bitfield<15, 0>  regList;
-        
+
         Bitfield<23, 0>  offset;
-        
+
         Bitfield<23, 0>  immed23_0;
 
         Bitfield<11, 8>  cpNum;
@@ -213,7 +214,8 @@ namespace ArmISA
 
         enum FlagBits {
             ThumbBit = (1 << 0),
-            JazelleBit = (1 << 1)
+            JazelleBit = (1 << 1),
+            AArch64Bit = (1 << 2)
         };
         uint8_t flags;
         uint8_t nextFlags;
@@ -304,6 +306,37 @@ namespace ArmISA
                 nextFlags &= ~JazelleBit;
         }
 
+        bool
+        aarch64() const
+        {
+            return flags & AArch64Bit;
+        }
+
+        void
+        aarch64(bool val)
+        {
+            if (val)
+                flags |= AArch64Bit;
+            else
+                flags &= ~AArch64Bit;
+        }
+
+        bool
+        nextAArch64() const
+        {
+            return nextFlags & AArch64Bit;
+        }
+
+        void
+        nextAArch64(bool val)
+        {
+            if (val)
+                nextFlags |= AArch64Bit;
+            else
+                nextFlags &= ~AArch64Bit;
+        }
+
+
         uint8_t
         itstate() const
         {
@@ -374,9 +407,15 @@ namespace ArmISA
         }
 
         void
-        instNPC(uint32_t val)
+        instNPC(Addr val)
         {
-            npc(val &~ mask(nextThumb() ? 1 : 2));
+            // @todo: review this when AArch32/64 interprocessing is
+            // supported
+            if (aarch64())
+                npc(val);  // AArch64 doesn't force PC alignment, a PC
+                           // Alignment Fault can be raised instead
+            else
+                npc(val &~ mask(nextThumb() ? 1 : 2));
         }
 
         Addr
@@ -387,7 +426,7 @@ namespace ArmISA
 
         // Perform an interworking branch.
         void
-        instIWNPC(uint32_t val)
+        instIWNPC(Addr val)
         {
             bool thumbEE = (thumb() && jazelle());
 
@@ -417,7 +456,7 @@ namespace ArmISA
         // Perform an interworking branch in ARM mode, a regular branch
         // otherwise.
         void
-        instAIWNPC(uint32_t val)
+        instAIWNPC(Addr val)
         {
             if (!thumb() && !jazelle())
                 instIWNPC(val);
@@ -470,6 +509,18 @@ namespace ArmISA
         ROR
     };
 
+    // Extension types for ARM instructions
+    enum ArmExtendType {
+        UXTB = 0,
+        UXTH = 1,
+        UXTW = 2,
+        UXTX = 3,
+        SXTB = 4,
+        SXTH = 5,
+        SXTW = 6,
+        SXTX = 7
+    };
+
     typedef uint64_t LargestRead;
     // Need to use 64 bits to make sure that read requests get handled properly
 
@@ -508,20 +559,154 @@ namespace ArmISA
         RND_NEAREST
     };
 
+    enum ExceptionLevel {
+        EL0 = 0,
+        EL1,
+        EL2,
+        EL3
+    };
+
     enum OperatingMode {
+        MODE_EL0T = 0x0,
+        MODE_EL1T = 0x4,
+        MODE_EL1H = 0x5,
+        MODE_EL2T = 0x8,
+        MODE_EL2H = 0x9,
+        MODE_EL3T = 0xC,
+        MODE_EL3H = 0xD,
         MODE_USER = 16,
         MODE_FIQ = 17,
         MODE_IRQ = 18,
         MODE_SVC = 19,
         MODE_MON = 22,
         MODE_ABORT = 23,
+        MODE_HYP = 26,
         MODE_UNDEFINED = 27,
         MODE_SYSTEM = 31,
         MODE_MAXMODE = MODE_SYSTEM
     };
 
+    enum ExceptionClass {
+        EC_INVALID                 = -1,
+        EC_UNKNOWN                 = 0x0,
+        EC_TRAPPED_WFI_WFE         = 0x1,
+        EC_TRAPPED_CP15_MCR_MRC    = 0x3,
+        EC_TRAPPED_CP15_MCRR_MRRC  = 0x4,
+        EC_TRAPPED_CP14_MCR_MRC    = 0x5,
+        EC_TRAPPED_CP14_LDC_STC    = 0x6,
+        EC_TRAPPED_HCPTR           = 0x7,
+        EC_TRAPPED_SIMD_FP         = 0x7,   // AArch64 alias
+        EC_TRAPPED_CP10_MRC_VMRS   = 0x8,
+        EC_TRAPPED_BXJ             = 0xA,
+        EC_TRAPPED_CP14_MCRR_MRRC  = 0xC,
+        EC_ILLEGAL_INST            = 0xE,
+        EC_SVC_TO_HYP              = 0x11,
+        EC_SVC                     = 0x11,  // AArch64 alias
+        EC_HVC                     = 0x12,
+        EC_SMC_TO_HYP              = 0x13,
+        EC_SMC                     = 0x13,  // AArch64 alias
+        EC_SVC_64                  = 0x15,
+        EC_HVC_64                  = 0x16,
+        EC_SMC_64                  = 0x17,
+        EC_TRAPPED_MSR_MRS_64      = 0x18,
+        EC_PREFETCH_ABORT_TO_HYP   = 0x20,
+        EC_PREFETCH_ABORT_LOWER_EL = 0x20,  // AArch64 alias
+        EC_PREFETCH_ABORT_FROM_HYP = 0x21,
+        EC_PREFETCH_ABORT_CURR_EL  = 0x21,  // AArch64 alias
+        EC_PC_ALIGNMENT            = 0x22,
+        EC_DATA_ABORT_TO_HYP       = 0x24,
+        EC_DATA_ABORT_LOWER_EL     = 0x24,  // AArch64 alias
+        EC_DATA_ABORT_FROM_HYP     = 0x25,
+        EC_DATA_ABORT_CURR_EL      = 0x25,  // AArch64 alias
+        EC_STACK_PTR_ALIGNMENT     = 0x26,
+        EC_FP_EXCEPTION            = 0x28,
+        EC_FP_EXCEPTION_64         = 0x2C,
+        EC_SERROR                  = 0x2F
+    };
+
+    BitUnion8(OperatingMode64)
+        Bitfield<0> spX;
+        Bitfield<3, 2> el;
+        Bitfield<4> width;
+    EndBitUnion(OperatingMode64)
+
+    static bool inline
+    opModeIs64(OperatingMode mode)
+    {
+        return ((OperatingMode64)(uint8_t)mode).width == 0;
+    }
+
+    static bool inline
+    opModeIsH(OperatingMode mode)
+    {
+        return (mode == MODE_EL1H || mode == MODE_EL2H || mode == MODE_EL3H);
+    }
+
+    static bool inline
+    opModeIsT(OperatingMode mode)
+    {
+        return (mode == MODE_EL0T || mode == MODE_EL1T || mode == MODE_EL2T ||
+                mode == MODE_EL3T);
+    }
+
+    static ExceptionLevel inline
+    opModeToEL(OperatingMode mode)
+    {
+        bool aarch32 = ((mode >> 4) & 1) ? true : false;
+        if (aarch32) {
+            switch (mode) {
+              case MODE_USER:
+                return EL0;
+              case MODE_FIQ:
+              case MODE_IRQ:
+              case MODE_SVC:
+              case MODE_ABORT:
+              case MODE_UNDEFINED:
+              case MODE_SYSTEM:
+                return EL1;
+              case MODE_HYP:
+                return EL2;
+              case MODE_MON:
+                return EL3;
+              default:
+                panic("Invalid operating mode: %d", mode);
+                break;
+            }
+        } else {
+            // aarch64
+            return (ExceptionLevel) ((mode >> 2) & 3);
+        }
+    }
+
     static inline bool
     badMode(OperatingMode mode)
+    {
+        switch (mode) {
+          case MODE_EL0T:
+          case MODE_EL1T:
+          case MODE_EL1H:
+          case MODE_EL2T:
+          case MODE_EL2H:
+          case MODE_EL3T:
+          case MODE_EL3H:
+          case MODE_USER:
+          case MODE_FIQ:
+          case MODE_IRQ:
+          case MODE_SVC:
+          case MODE_MON:
+          case MODE_ABORT:
+          case MODE_HYP:
+          case MODE_UNDEFINED:
+          case MODE_SYSTEM:
+            return false;
+          default:
+            return true;
+        }
+    }
+
+
+    static inline bool
+    badMode32(OperatingMode mode)
     {
         switch (mode) {
           case MODE_USER:
@@ -530,6 +715,7 @@ namespace ArmISA
           case MODE_SVC:
           case MODE_MON:
           case MODE_ABORT:
+          case MODE_HYP:
           case MODE_UNDEFINED:
           case MODE_SYSTEM:
             return false;
diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc
index cddc2c5c4..3d7d9c4fc 100644
--- a/src/arch/arm/utility.cc
+++ b/src/arch/arm/utility.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009-2012 ARM Limited
+ * Copyright (c) 2009-2013 ARM Limited
  * All rights reserved.
  *
  * The license below extends only to copyright in the software and shall
@@ -40,6 +40,7 @@
 
 #include "arch/arm/faults.hh"
 #include "arch/arm/isa_traits.hh"
+#include "arch/arm/system.hh"
 #include "arch/arm/tlb.hh"
 #include "arch/arm/utility.hh"
 #include "arch/arm/vtophys.hh"
@@ -70,51 +71,68 @@ getArgument(ThreadContext *tc, int &number, uint16_t size, bool fp)
         M5_DUMMY_RETURN
     }
 
-    if (size == (uint16_t)(-1))
-        size = ArmISA::MachineBytes;
     if (fp)
         panic("getArgument(): Floating point arguments not implemented\n");
 
-    if (number < NumArgumentRegs) {
-        // If the argument is 64 bits, it must be in an even regiser
-        // number. Increment the number here if it isn't even.
-        if (size == sizeof(uint64_t)) {
-            if ((number % 2) != 0)
-                number++;
-            // Read the two halves of the data. Number is inc here to
-            // get the second half of the 64 bit reg.
-            uint64_t tmp;
-            tmp = tc->readIntReg(number++);
-            tmp |= tc->readIntReg(number) << 32;
-            return tmp;
+    if (inAArch64(tc)) {
+        if (size == (uint16_t)(-1))
+            size = sizeof(uint64_t);
+
+        if (number < 8 /*NumArgumentRegs64*/) {
+               return tc->readIntReg(number);
         } else {
-           return tc->readIntReg(number);
+            panic("getArgument(): No support reading stack args for AArch64\n");
         }
     } else {
-        Addr sp = tc->readIntReg(StackPointerReg);
-        FSTranslatingPortProxy &vp = tc->getVirtProxy();
-        uint64_t arg;
-        if (size == sizeof(uint64_t)) {
-            // If the argument is even it must be aligned
-            if ((number % 2) != 0)
-                number++;
-            arg = vp.read<uint64_t>(sp +
-                    (number-NumArgumentRegs) * sizeof(uint32_t));
-            // since two 32 bit args == 1 64 bit arg, increment number
-            number++;
+        if (size == (uint16_t)(-1))
+            size = ArmISA::MachineBytes;
+
+        if (number < NumArgumentRegs) {
+            // If the argument is 64 bits, it must be in an even regiser
+            // number. Increment the number here if it isn't even.
+            if (size == sizeof(uint64_t)) {
+                if ((number % 2) != 0)
+                    number++;
+                // Read the two halves of the data. Number is inc here to
+                // get the second half of the 64 bit reg.
+                uint64_t tmp;
+                tmp = tc->readIntReg(number++);
+                tmp |= tc->readIntReg(number) << 32;
+                return tmp;
+            } else {
+               return tc->readIntReg(number);
+            }
         } else {
-            arg = vp.read<uint32_t>(sp +
-                           (number-NumArgumentRegs) * sizeof(uint32_t));
+            Addr sp = tc->readIntReg(StackPointerReg);
+            FSTranslatingPortProxy &vp = tc->getVirtProxy();
+            uint64_t arg;
+            if (size == sizeof(uint64_t)) {
+                // If the argument is even it must be aligned
+                if ((number % 2) != 0)
+                    number++;
+                arg = vp.read<uint64_t>(sp +
+                        (number-NumArgumentRegs) * sizeof(uint32_t));
+                // since two 32 bit args == 1 64 bit arg, increment number
+                number++;
+            } else {
+                arg = vp.read<uint32_t>(sp +
+                               (number-NumArgumentRegs) * sizeof(uint32_t));
+            }
+            return arg;
         }
-        return arg;
     }
+    panic("getArgument() should always return\n");
 }
 
 void
 skipFunction(ThreadContext *tc)
 {
     PCState newPC = tc->pcState();
-    newPC.set(tc->readIntReg(ReturnAddressReg) & ~ULL(1));
+    if (inAArch64(tc)) {
+        newPC.set(tc->readIntReg(INTREG_X30));
+    } else {
+        newPC.set(tc->readIntReg(ReturnAddressReg) & ~ULL(1));
+    }
 
     CheckerCPU *checker = tc->getCheckerCpuPtr();
     if (checker) {
@@ -151,6 +169,128 @@ copyRegs(ThreadContext *src, ThreadContext *dest)
     dest->getDTBPtr()->invalidateMiscReg();
 }
 
+bool
+inSecureState(ThreadContext *tc)
+{
+    SCR scr = inAArch64(tc) ? tc->readMiscReg(MISCREG_SCR_EL3) :
+        tc->readMiscReg(MISCREG_SCR);
+    return ArmSystem::haveSecurity(tc) && inSecureState(
+        scr, tc->readMiscReg(MISCREG_CPSR));
+}
+
+bool
+inAArch64(ThreadContext *tc)
+{
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    return opModeIs64((OperatingMode) (uint8_t) cpsr.mode);
+}
+
+bool
+longDescFormatInUse(ThreadContext *tc)
+{
+    TTBCR ttbcr = tc->readMiscReg(MISCREG_TTBCR);
+    return ArmSystem::haveLPAE(tc) && ttbcr.eae;
+}
+
+uint32_t
+getMPIDR(ArmSystem *arm_sys, ThreadContext *tc)
+{
+    if (arm_sys->multiProc) {
+       return 0x80000000 | // multiprocessor extensions available
+              tc->cpuId();
+    } else {
+       return 0x80000000 |  // multiprocessor extensions available
+              0x40000000 |  // in up system
+              tc->cpuId();
+    }
+}
+
+bool
+ELIs64(ThreadContext *tc, ExceptionLevel el)
+{
+    if (ArmSystem::highestEL(tc) == el)
+        // Register width is hard-wired
+        return ArmSystem::highestELIs64(tc);
+
+    switch (el) {
+      case EL0:
+        return opModeIs64(currOpMode(tc));
+      case EL1:
+        {
+            // @todo: uncomment this to enable Virtualization
+            // if (ArmSystem::haveVirtualization(tc)) {
+            //     HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+            //     return hcr.rw;
+            // }
+            assert(ArmSystem::haveSecurity(tc));
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            return scr.rw;
+        }
+      case EL2:
+        {
+            assert(ArmSystem::haveSecurity(tc));
+            SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+            return scr.rw;
+        }
+      default:
+        panic("Invalid exception level");
+        break;
+    }
+}
+
+bool
+isBigEndian64(ThreadContext *tc)
+{
+    switch (opModeToEL(currOpMode(tc))) {
+      case EL3:
+        return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL3)).ee;
+      case EL2:
+        return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL2)).ee;
+      case EL1:
+        return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).ee;
+      case EL0:
+        return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).e0e;
+      default:
+        panic("Invalid exception level");
+        break;
+    }
+}
+
+Addr
+purifyTaggedAddr(Addr addr, ThreadContext *tc, ExceptionLevel el)
+{
+    TTBCR tcr;
+
+    switch (el) {
+      case EL0:
+      case EL1:
+        tcr = tc->readMiscReg(MISCREG_TCR_EL1);
+        if (bits(addr, 55, 48) == 0xFF && tcr.tbi1)
+            return addr | mask(63, 55);
+        else if (!bits(addr, 55, 48) && tcr.tbi0)
+            return bits(addr,55, 0);
+        break;
+      // @todo: uncomment this to enable Virtualization
+      // case EL2:
+      //   assert(ArmSystem::haveVirtualization());
+      //   tcr = tc->readMiscReg(MISCREG_TCR_EL2);
+      //   if (tcr.tbi)
+      //       return addr & mask(56);
+      //   break;
+      case EL3:
+        assert(ArmSystem::haveSecurity(tc));
+        tcr = tc->readMiscReg(MISCREG_TCR_EL3);
+        if (tcr.tbi)
+            return addr & mask(56);
+        break;
+      default:
+        panic("Invalid exception level");
+        break;
+    }
+
+    return addr;  // Nothing to do if this is not a tagged address
+}
+
 Addr
 truncPage(Addr addr)
 {
@@ -163,4 +303,667 @@ roundPage(Addr addr)
     return (addr + PageBytes - 1) & ~(PageBytes - 1);
 }
 
+bool
+mcrMrc15TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr,
+                  HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss)
+{
+    bool        isRead;
+    uint32_t    crm;
+    IntRegIndex rt;
+    uint32_t    crn;
+    uint32_t    opc1;
+    uint32_t    opc2;
+    bool        trapToHype = false;
+
+
+    if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) {
+        mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2);
+        trapToHype  = ((uint32_t) hstr) & (1 << crn);
+        trapToHype |= hdcr.tpm  && (crn == 9) && (crm >= 12);
+        trapToHype |= hcr.tidcp && (
+            ((crn ==  9) && ((crm <= 2) || ((crm >= 5) && (crm <= 8)))) ||
+            ((crn == 10) && ((crm <= 1) ||  (crm == 4) || (crm == 8)))  ||
+            ((crn == 11) && ((crm <= 8) ||  (crm == 15)))               );
+
+        if (!trapToHype) {
+            switch (unflattenMiscReg(miscReg)) {
+              case MISCREG_CPACR:
+                trapToHype = hcptr.tcpac;
+                break;
+              case MISCREG_REVIDR:
+              case MISCREG_TCMTR:
+              case MISCREG_TLBTR:
+              case MISCREG_AIDR:
+                trapToHype = hcr.tid1;
+                break;
+              case MISCREG_CTR:
+              case MISCREG_CCSIDR:
+              case MISCREG_CLIDR:
+              case MISCREG_CSSELR:
+                trapToHype = hcr.tid2;
+                break;
+              case MISCREG_ID_PFR0:
+              case MISCREG_ID_PFR1:
+              case MISCREG_ID_DFR0:
+              case MISCREG_ID_AFR0:
+              case MISCREG_ID_MMFR0:
+              case MISCREG_ID_MMFR1:
+              case MISCREG_ID_MMFR2:
+              case MISCREG_ID_MMFR3:
+              case MISCREG_ID_ISAR0:
+              case MISCREG_ID_ISAR1:
+              case MISCREG_ID_ISAR2:
+              case MISCREG_ID_ISAR3:
+              case MISCREG_ID_ISAR4:
+              case MISCREG_ID_ISAR5:
+                trapToHype = hcr.tid3;
+                break;
+              case MISCREG_DCISW:
+              case MISCREG_DCCSW:
+              case MISCREG_DCCISW:
+                trapToHype = hcr.tsw;
+                break;
+              case MISCREG_DCIMVAC:
+              case MISCREG_DCCIMVAC:
+              case MISCREG_DCCMVAC:
+                trapToHype = hcr.tpc;
+                break;
+              case MISCREG_ICIMVAU:
+              case MISCREG_ICIALLU:
+              case MISCREG_ICIALLUIS:
+              case MISCREG_DCCMVAU:
+                trapToHype = hcr.tpu;
+                break;
+              case MISCREG_TLBIALLIS:
+              case MISCREG_TLBIMVAIS:
+              case MISCREG_TLBIASIDIS:
+              case MISCREG_TLBIMVAAIS:
+              case MISCREG_DTLBIALL:
+              case MISCREG_ITLBIALL:
+              case MISCREG_DTLBIMVA:
+              case MISCREG_ITLBIMVA:
+              case MISCREG_DTLBIASID:
+              case MISCREG_ITLBIASID:
+              case MISCREG_TLBIMVAA:
+              case MISCREG_TLBIALL:
+              case MISCREG_TLBIMVA:
+              case MISCREG_TLBIASID:
+                trapToHype = hcr.ttlb;
+                break;
+              case MISCREG_ACTLR:
+                trapToHype = hcr.tac;
+                break;
+              case MISCREG_SCTLR:
+              case MISCREG_TTBR0:
+              case MISCREG_TTBR1:
+              case MISCREG_TTBCR:
+              case MISCREG_DACR:
+              case MISCREG_DFSR:
+              case MISCREG_IFSR:
+              case MISCREG_DFAR:
+              case MISCREG_IFAR:
+              case MISCREG_ADFSR:
+              case MISCREG_AIFSR:
+              case MISCREG_PRRR:
+              case MISCREG_NMRR:
+              case MISCREG_MAIR0:
+              case MISCREG_MAIR1:
+              case MISCREG_CONTEXTIDR:
+                trapToHype = hcr.tvm & !isRead;
+                break;
+              case MISCREG_PMCR:
+                trapToHype = hdcr.tpmcr;
+                break;
+              // No default action needed
+              default:
+                break;
+            }
+        }
+    }
+    return trapToHype;
+}
+
+
+bool
+mcrMrc14TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr,
+                  HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss)
+{
+    bool        isRead;
+    uint32_t    crm;
+    IntRegIndex rt;
+    uint32_t    crn;
+    uint32_t    opc1;
+    uint32_t    opc2;
+    bool        trapToHype = false;
+
+    if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) {
+        mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2);
+        inform("trap check M:%x N:%x 1:%x 2:%x hdcr %x, hcptr %x, hstr %x\n",
+                crm, crn, opc1, opc2, hdcr, hcptr, hstr);
+        trapToHype  = hdcr.tda  && (opc1 == 0);
+        trapToHype |= hcptr.tta && (opc1 == 1);
+        if (!trapToHype) {
+            switch (unflattenMiscReg(miscReg)) {
+              case MISCREG_DBGOSLSR:
+              case MISCREG_DBGOSLAR:
+              case MISCREG_DBGOSDLR:
+              case MISCREG_DBGPRCR:
+                trapToHype = hdcr.tdosa;
+                break;
+              case MISCREG_DBGDRAR:
+              case MISCREG_DBGDSAR:
+                trapToHype = hdcr.tdra;
+                break;
+              case MISCREG_JIDR:
+                trapToHype = hcr.tid0;
+                break;
+              case MISCREG_JOSCR:
+              case MISCREG_JMCR:
+                trapToHype = hstr.tjdbx;
+                break;
+              case MISCREG_TEECR:
+              case MISCREG_TEEHBR:
+                trapToHype = hstr.ttee;
+                break;
+              // No default action needed
+              default:
+                break;
+            }
+        }
+    }
+    return trapToHype;
+}
+
+bool
+mcrrMrrc15TrapToHyp(const MiscRegIndex miscReg, CPSR cpsr, SCR scr, HSTR hstr,
+                    HCR hcr, uint32_t iss)
+{
+    uint32_t    crm;
+    IntRegIndex rt;
+    uint32_t    crn;
+    uint32_t    opc1;
+    uint32_t    opc2;
+    bool        isRead;
+    bool        trapToHype = false;
+
+    if (!inSecureState(scr, cpsr) && (cpsr.mode != MODE_HYP)) {
+        // This is technically the wrong function, but we can re-use it for
+        // the moment because we only need one field, which overlaps with the
+        // mcrmrc layout
+        mcrMrcIssExtract(iss, isRead, crm, rt, crn, opc1, opc2);
+        trapToHype = ((uint32_t) hstr) & (1 << crm);
+
+        if (!trapToHype) {
+            switch (unflattenMiscReg(miscReg)) {
+              case MISCREG_SCTLR:
+              case MISCREG_TTBR0:
+              case MISCREG_TTBR1:
+              case MISCREG_TTBCR:
+              case MISCREG_DACR:
+              case MISCREG_DFSR:
+              case MISCREG_IFSR:
+              case MISCREG_DFAR:
+              case MISCREG_IFAR:
+              case MISCREG_ADFSR:
+              case MISCREG_AIFSR:
+              case MISCREG_PRRR:
+              case MISCREG_NMRR:
+              case MISCREG_MAIR0:
+              case MISCREG_MAIR1:
+              case MISCREG_CONTEXTIDR:
+                trapToHype = hcr.tvm & !isRead;
+                break;
+              // No default action needed
+              default:
+                break;
+            }
+        }
+    }
+    return trapToHype;
+}
+
+bool
+msrMrs64TrapToSup(const MiscRegIndex miscReg, ExceptionLevel el,
+                  CPACR cpacr /* CPACR_EL1 */)
+{
+    bool trapToSup = false;
+    switch (miscReg) {
+      case MISCREG_FPCR:
+      case MISCREG_FPSR:
+      case MISCREG_FPEXC32_EL2:
+        if ((el == EL0 && cpacr.fpen != 0x3) ||
+            (el == EL1 && !(cpacr.fpen & 0x1)))
+            trapToSup = true;
+        break;
+      default:
+        break;
+    }
+    return trapToSup;
+}
+
+bool
+msrMrs64TrapToHyp(const MiscRegIndex miscReg, bool isRead,
+                  CPTR cptr /* CPTR_EL2 */,
+                  HCR hcr /* HCR_EL2 */,
+                  bool * isVfpNeon)
+{
+    bool trapToHyp = false;
+    *isVfpNeon = false;
+
+    switch (miscReg) {
+      // FP/SIMD regs
+      case MISCREG_FPCR:
+      case MISCREG_FPSR:
+      case MISCREG_FPEXC32_EL2:
+        trapToHyp = cptr.tfp;
+        *isVfpNeon = true;
+        break;
+      // CPACR
+      case MISCREG_CPACR_EL1:
+        trapToHyp = cptr.tcpac;
+        break;
+      // Virtual memory control regs
+      case MISCREG_SCTLR_EL1:
+      case MISCREG_TTBR0_EL1:
+      case MISCREG_TTBR1_EL1:
+      case MISCREG_TCR_EL1:
+      case MISCREG_ESR_EL1:
+      case MISCREG_FAR_EL1:
+      case MISCREG_AFSR0_EL1:
+      case MISCREG_AFSR1_EL1:
+      case MISCREG_MAIR_EL1:
+      case MISCREG_AMAIR_EL1:
+      case MISCREG_CONTEXTIDR_EL1:
+        trapToHyp = (hcr.trvm && isRead) || (hcr.tvm && !isRead);
+        break;
+      // TLB maintenance instructions
+      case MISCREG_TLBI_VMALLE1:
+      case MISCREG_TLBI_VAE1_Xt:
+      case MISCREG_TLBI_ASIDE1_Xt:
+      case MISCREG_TLBI_VAAE1_Xt:
+      case MISCREG_TLBI_VALE1_Xt:
+      case MISCREG_TLBI_VAALE1_Xt:
+      case MISCREG_TLBI_VMALLE1IS:
+      case MISCREG_TLBI_VAE1IS_Xt:
+      case MISCREG_TLBI_ASIDE1IS_Xt:
+      case MISCREG_TLBI_VAAE1IS_Xt:
+      case MISCREG_TLBI_VALE1IS_Xt:
+      case MISCREG_TLBI_VAALE1IS_Xt:
+        trapToHyp = hcr.ttlb;
+        break;
+      // Cache maintenance instructions to the point of unification
+      case MISCREG_IC_IVAU_Xt:
+      case MISCREG_ICIALLU:
+      case MISCREG_ICIALLUIS:
+      case MISCREG_DC_CVAU_Xt:
+        trapToHyp = hcr.tpu;
+        break;
+      // Data/Unified cache maintenance instructions to the point of coherency
+      case MISCREG_DC_IVAC_Xt:
+      case MISCREG_DC_CIVAC_Xt:
+      case MISCREG_DC_CVAC_Xt:
+        trapToHyp = hcr.tpc;
+        break;
+      // Data/Unified cache maintenance instructions by set/way
+      case MISCREG_DC_ISW_Xt:
+      case MISCREG_DC_CSW_Xt:
+      case MISCREG_DC_CISW_Xt:
+        trapToHyp = hcr.tsw;
+        break;
+      // ACTLR
+      case MISCREG_ACTLR_EL1:
+        trapToHyp = hcr.tacr;
+        break;
+
+      // @todo: Trap implementation-dependent functionality based on
+      // hcr.tidcp
+
+      // ID regs, group 3
+      case MISCREG_ID_PFR0_EL1:
+      case MISCREG_ID_PFR1_EL1:
+      case MISCREG_ID_DFR0_EL1:
+      case MISCREG_ID_AFR0_EL1:
+      case MISCREG_ID_MMFR0_EL1:
+      case MISCREG_ID_MMFR1_EL1:
+      case MISCREG_ID_MMFR2_EL1:
+      case MISCREG_ID_MMFR3_EL1:
+      case MISCREG_ID_ISAR0_EL1:
+      case MISCREG_ID_ISAR1_EL1:
+      case MISCREG_ID_ISAR2_EL1:
+      case MISCREG_ID_ISAR3_EL1:
+      case MISCREG_ID_ISAR4_EL1:
+      case MISCREG_ID_ISAR5_EL1:
+      case MISCREG_MVFR0_EL1:
+      case MISCREG_MVFR1_EL1:
+      case MISCREG_MVFR2_EL1:
+      case MISCREG_ID_AA64PFR0_EL1:
+      case MISCREG_ID_AA64PFR1_EL1:
+      case MISCREG_ID_AA64DFR0_EL1:
+      case MISCREG_ID_AA64DFR1_EL1:
+      case MISCREG_ID_AA64ISAR0_EL1:
+      case MISCREG_ID_AA64ISAR1_EL1:
+      case MISCREG_ID_AA64MMFR0_EL1:
+      case MISCREG_ID_AA64MMFR1_EL1:
+      case MISCREG_ID_AA64AFR0_EL1:
+      case MISCREG_ID_AA64AFR1_EL1:
+        assert(isRead);
+        trapToHyp = hcr.tid3;
+        break;
+      // ID regs, group 2
+      case MISCREG_CTR_EL0:
+      case MISCREG_CCSIDR_EL1:
+      case MISCREG_CLIDR_EL1:
+      case MISCREG_CSSELR_EL1:
+        trapToHyp = hcr.tid2;
+        break;
+      // ID regs, group 1
+      case MISCREG_AIDR_EL1:
+      case MISCREG_REVIDR_EL1:
+        assert(isRead);
+        trapToHyp = hcr.tid1;
+        break;
+      default:
+        break;
+    }
+    return trapToHyp;
+}
+
+bool
+msrMrs64TrapToMon(const MiscRegIndex miscReg, CPTR cptr /* CPTR_EL3 */,
+                  ExceptionLevel el, bool * isVfpNeon)
+{
+    bool trapToMon = false;
+    *isVfpNeon = false;
+
+    switch (miscReg) {
+      // FP/SIMD regs
+      case MISCREG_FPCR:
+      case MISCREG_FPSR:
+      case MISCREG_FPEXC32_EL2:
+        trapToMon = cptr.tfp;
+        *isVfpNeon = true;
+        break;
+      // CPACR, CPTR
+      case MISCREG_CPACR_EL1:
+        if (el == EL1) {
+           trapToMon = cptr.tcpac;
+        }
+        break;
+      case MISCREG_CPTR_EL2:
+        if (el == EL2) {
+            trapToMon = cptr.tcpac;
+        }
+        break;
+      default:
+        break;
+    }
+    return trapToMon;
+}
+
+bool
+decodeMrsMsrBankedReg(uint8_t sysM, bool r, bool &isIntReg, int &regIdx,
+                      CPSR cpsr, SCR scr, NSACR nsacr, bool checkSecurity)
+{
+    OperatingMode mode;
+    bool          ok = true;
+
+    // R mostly indicates if its a int register or a misc reg, we override
+    // below if the few corner cases
+    isIntReg = !r;
+    // Loosely based on ARM ARM issue C section B9.3.10
+    if (r) {
+        switch (sysM)
+        {
+          case 0xE:
+            regIdx = MISCREG_SPSR_FIQ;
+            mode   = MODE_FIQ;
+            break;
+          case 0x10:
+            regIdx = MISCREG_SPSR_IRQ;
+            mode   = MODE_IRQ;
+            break;
+          case 0x12:
+            regIdx = MISCREG_SPSR_SVC;
+            mode   = MODE_SVC;
+            break;
+          case 0x14:
+            regIdx = MISCREG_SPSR_ABT;
+            mode   = MODE_ABORT;
+            break;
+          case 0x16:
+            regIdx = MISCREG_SPSR_UND;
+            mode   = MODE_UNDEFINED;
+            break;
+          case 0x1C:
+            regIdx = MISCREG_SPSR_MON;
+            mode   = MODE_MON;
+            break;
+          case 0x1E:
+            regIdx = MISCREG_SPSR_HYP;
+            mode   = MODE_HYP;
+            break;
+          default:
+            ok = false;
+            break;
+        }
+    } else {
+        int sysM4To3 = bits(sysM, 4, 3);
+
+        if (sysM4To3 == 0) {
+            mode = MODE_USER;
+            regIdx = intRegInMode(mode, bits(sysM, 2, 0) + 8);
+        } else if (sysM4To3 == 1) {
+            mode = MODE_FIQ;
+            regIdx = intRegInMode(mode, bits(sysM, 2, 0) + 8);
+        } else if (sysM4To3 == 3) {
+            if (bits(sysM, 1) == 0) {
+                mode = MODE_MON;
+                regIdx = intRegInMode(mode, 14 - bits(sysM, 0));
+            } else {
+                mode = MODE_HYP;
+                if (bits(sysM, 0) == 1) {
+                    regIdx = intRegInMode(mode, 13); // R13 in HYP
+                } else {
+                    isIntReg = false;
+                    regIdx   = MISCREG_ELR_HYP;
+                }
+            }
+        } else { // Other Banked registers
+            int sysM2 = bits(sysM, 2);
+            int sysM1 = bits(sysM, 1);
+
+            mode  = (OperatingMode) ( ((sysM2 ||  sysM1) << 0) |
+                                      (1                 << 1) |
+                                      ((sysM2 && !sysM1) << 2) |
+                                      ((sysM2 &&  sysM1) << 3) |
+                                      (1                 << 4) );
+            regIdx = intRegInMode(mode, 14 - bits(sysM, 0));
+            // Don't flatten the register here. This is going to go through
+            // setIntReg() which will do the flattening
+            ok &= mode != cpsr.mode;
+        }
+    }
+
+    // Check that the requested register is accessable from the current mode
+    if (ok && checkSecurity && mode != cpsr.mode) {
+        switch (cpsr.mode)
+        {
+          case MODE_USER:
+            ok = false;
+            break;
+          case MODE_FIQ:
+            ok &=  mode != MODE_HYP;
+            ok &= (mode != MODE_MON) || !scr.ns;
+            break;
+          case MODE_HYP:
+            ok &=  mode != MODE_MON;
+            ok &= (mode != MODE_FIQ) || !nsacr.rfr;
+            break;
+          case MODE_IRQ:
+          case MODE_SVC:
+          case MODE_ABORT:
+          case MODE_UNDEFINED:
+          case MODE_SYSTEM:
+            ok &=  mode != MODE_HYP;
+            ok &= (mode != MODE_MON) || !scr.ns;
+            ok &= (mode != MODE_FIQ) || !nsacr.rfr;
+            break;
+          // can access everything, no further checks required
+          case MODE_MON:
+            break;
+          default:
+            panic("unknown Mode 0x%x\n", cpsr.mode);
+            break;
+        }
+    }
+    return (ok);
+}
+
+bool
+vfpNeonEnabled(uint32_t &seq, HCPTR hcptr, NSACR nsacr, CPACR cpacr, CPSR cpsr,
+               uint32_t &iss, bool &trap, ThreadContext *tc, FPEXC fpexc,
+               bool isSIMD)
+{
+    iss                     = 0;
+    trap                    = false;
+    bool undefined          = false;
+    bool haveSecurity       = ArmSystem::haveSecurity(tc);
+    bool haveVirtualization = ArmSystem::haveVirtualization(tc);
+    bool isSecure           = inSecureState(tc);
+
+    // Non-secure view of CPACR and HCPTR determines behavior
+    // Copy register values
+    uint8_t cpacr_cp10   = cpacr.cp10;
+    bool    cpacr_asedis = cpacr.asedis;
+    bool    hcptr_cp10   = false;
+    bool    hcptr_tase   = false;
+
+    bool cp10_enabled = cpacr.cp10 == 0x3
+                      || (cpacr.cp10 == 0x1 && inPrivilegedMode(cpsr));
+
+    bool cp11_enabled =  cpacr.cp11 == 0x3
+                      || (cpacr.cp11 == 0x1 && inPrivilegedMode(cpsr));
+
+    if (cp11_enabled) {
+        undefined |= !(fpexc.en && cp10_enabled);
+    } else {
+        undefined |= !(fpexc.en && cp10_enabled && (cpacr.cp11 == cpacr.cp10));
+    }
+
+    if (haveVirtualization) {
+        hcptr_cp10 = hcptr.tcp10;
+        undefined |= hcptr.tcp10 != hcptr.tcp11;
+        hcptr_tase = hcptr.tase;
+    }
+
+    if (haveSecurity) {
+        undefined |= nsacr.cp10 != nsacr.cp11;
+        if (!isSecure) {
+            // Modify register values to the Non-secure view
+            if (!nsacr.cp10) {
+                cpacr_cp10 = 0;
+                if (haveVirtualization) {
+                    hcptr_cp10 = true;
+                }
+            }
+            if (nsacr.nsasedis) {
+                cpacr_asedis = true;
+                if (haveVirtualization) {
+                    hcptr_tase = true;
+                }
+            }
+        }
+    }
+
+    // Check Coprocessor Access Control Register for permission to use CP10/11.
+    if (!haveVirtualization || (cpsr.mode != MODE_HYP)) {
+        switch (cpacr_cp10)
+        {
+            case 0:
+                undefined = true;
+                break;
+            case 1:
+                undefined |= inUserMode(cpsr);
+                break;
+        }
+
+        // Check if SIMD operations are disabled
+        if (isSIMD && cpacr_asedis) undefined = true;
+    }
+
+    // If required, check FPEXC enabled bit.
+    undefined |= !fpexc.en;
+
+    if (haveSecurity && haveVirtualization && !isSecure) {
+        if (hcptr_cp10 || (isSIMD && hcptr_tase)) {
+            iss  = isSIMD ? (1 << 5) : 0xA;
+            trap = true;
+        }
+    }
+
+    return (!undefined);
+}
+
+bool
+SPAlignmentCheckEnabled(ThreadContext* tc)
+{
+    switch (opModeToEL(currOpMode(tc))) {
+      case EL3:
+        return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL3)).sa;
+      case EL2:
+        return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL2)).sa;
+      case EL1:
+        return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).sa;
+      case EL0:
+        return ((SCTLR) tc->readMiscReg(MISCREG_SCTLR_EL1)).sa0;
+      default:
+        panic("Invalid exception level");
+        break;
+    }
+}
+
+int
+decodePhysAddrRange64(uint8_t pa_enc)
+{
+    switch (pa_enc) {
+      case 0x0:
+        return 32;
+      case 0x1:
+        return 36;
+      case 0x2:
+        return 40;
+      case 0x3:
+        return 42;
+      case 0x4:
+        return 44;
+      case 0x5:
+      case 0x6:
+      case 0x7:
+        return 48;
+      default:
+        panic("Invalid phys. address range encoding");
+    }
+}
+
+uint8_t
+encodePhysAddrRange64(int pa_size)
+{
+    switch (pa_size) {
+      case 32:
+        return 0x0;
+      case 36:
+        return 0x1;
+      case 40:
+        return 0x2;
+      case 42:
+        return 0x3;
+      case 44:
+        return 0x4;
+      case 48:
+        return 0x5;
+      default:
+        panic("Invalid phys. address range");
+    }
+}
+
 } // namespace ArmISA
diff --git a/src/arch/arm/utility.hh b/src/arch/arm/utility.hh
index e4fc658e0..1eea743bb 100644
--- a/src/arch/arm/utility.hh
+++ b/src/arch/arm/utility.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -54,6 +54,8 @@
 #include "cpu/static_inst.hh"
 #include "cpu/thread_context.hh"
 
+class ArmSystem;
+
 namespace ArmISA {
 
 inline PCState
@@ -118,7 +120,7 @@ void initCPU(ThreadContext *tc, int cpuId);
 static inline bool
 inUserMode(CPSR cpsr)
 {
-    return cpsr.mode == MODE_USER;
+    return cpsr.mode == MODE_USER || cpsr.mode == MODE_EL0T;
 }
 
 static inline bool
@@ -139,30 +141,139 @@ inPrivilegedMode(ThreadContext *tc)
     return !inUserMode(tc);
 }
 
-static inline bool
-vfpEnabled(CPACR cpacr, CPSR cpsr)
+bool inAArch64(ThreadContext *tc);
+
+static inline OperatingMode
+currOpMode(ThreadContext *tc)
 {
-    return cpacr.cp10 == 0x3 ||
-        (cpacr.cp10 == 0x1 && inPrivilegedMode(cpsr));
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    return (OperatingMode) (uint8_t) cpsr.mode;
 }
 
-static inline bool
-vfpEnabled(CPACR cpacr, CPSR cpsr, FPEXC fpexc)
+static inline ExceptionLevel
+currEL(ThreadContext *tc)
 {
-    if ((cpacr.cp11 == 0x3) ||
-        ((cpacr.cp11 == 0x1) && inPrivilegedMode(cpsr)))
-        return fpexc.en && vfpEnabled(cpacr, cpsr);
-    else
-        return fpexc.en && vfpEnabled(cpacr, cpsr) &&
-            (cpacr.cp11 == cpacr.cp10);
+    CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
+    return (ExceptionLevel) (uint8_t) cpsr.el;
 }
 
+bool ELIs64(ThreadContext *tc, ExceptionLevel el);
+
+bool isBigEndian64(ThreadContext *tc);
+
+/**
+ * Removes the tag from tagged addresses if that mode is enabled.
+ * @param addr The address to be purified.
+ * @param tc The thread context.
+ * @param el The controlled exception level.
+ * @return The purified address.
+ */
+Addr purifyTaggedAddr(Addr addr, ThreadContext *tc, ExceptionLevel el);
+
 static inline bool
-neonEnabled(CPACR cpacr, CPSR cpsr, FPEXC fpexc)
+inSecureState(SCR scr, CPSR cpsr)
 {
-    return !cpacr.asedis && vfpEnabled(cpacr, cpsr, fpexc);
+    switch ((OperatingMode) (uint8_t) cpsr.mode) {
+      case MODE_MON:
+      case MODE_EL3T:
+      case MODE_EL3H:
+        return true;
+      case MODE_HYP:
+      case MODE_EL2T:
+      case MODE_EL2H:
+        return false;
+      default:
+        return !scr.ns;
+    }
 }
 
+bool longDescFormatInUse(ThreadContext *tc);
+
+bool inSecureState(ThreadContext *tc);
+
+uint32_t getMPIDR(ArmSystem *arm_sys, ThreadContext *tc);
+
+static inline uint32_t
+mcrMrcIssBuild(bool isRead, uint32_t crm, IntRegIndex rt, uint32_t crn,
+               uint32_t opc1, uint32_t opc2)
+{
+    return (isRead <<  0) |
+           (crm    <<  1) |
+           (rt     <<  5) |
+           (crn    << 10) |
+           (opc1   << 14) |
+           (opc2   << 17);
+}
+
+static inline void
+mcrMrcIssExtract(uint32_t iss, bool &isRead, uint32_t &crm, IntRegIndex &rt,
+                 uint32_t &crn, uint32_t &opc1, uint32_t &opc2)
+{
+    isRead = (iss >>  0) & 0x1;
+    crm    = (iss >>  1) & 0xF;
+    rt     = (IntRegIndex) ((iss >>  5) & 0xF);
+    crn    = (iss >> 10) & 0xF;
+    opc1   = (iss >> 14) & 0x7;
+    opc2   = (iss >> 17) & 0x7;
+}
+
+static inline uint32_t
+mcrrMrrcIssBuild(bool isRead, uint32_t crm, IntRegIndex rt, IntRegIndex rt2,
+                 uint32_t opc1)
+{
+    return (isRead <<  0) |
+           (crm    <<  1) |
+           (rt     <<  5) |
+           (rt2    << 10) |
+           (opc1   << 16);
+}
+
+static inline uint32_t
+msrMrs64IssBuild(bool isRead, uint32_t op0, uint32_t op1, uint32_t crn,
+                 uint32_t crm, uint32_t op2, IntRegIndex rt)
+{
+    return isRead |
+        (crm << 1) |
+        (rt << 5) |
+        (crn << 10) |
+        (op1 << 14) |
+        (op2 << 17) |
+        (op0 << 20);
+}
+
+bool
+mcrMrc15TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr,
+                  HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss);
+bool
+mcrMrc14TrapToHyp(const MiscRegIndex miscReg, HCR hcr, CPSR cpsr, SCR scr,
+                  HDCR hdcr, HSTR hstr, HCPTR hcptr, uint32_t iss);
+bool
+mcrrMrrc15TrapToHyp(const MiscRegIndex miscReg, CPSR cpsr, SCR scr, HSTR hstr,
+                    HCR hcr, uint32_t iss);
+
+bool msrMrs64TrapToSup(const MiscRegIndex miscReg, ExceptionLevel el,
+                       CPACR cpacr);
+bool msrMrs64TrapToHyp(const MiscRegIndex miscReg, bool isRead, CPTR cptr,
+                       HCR hcr, bool * isVfpNeon);
+bool msrMrs64TrapToMon(const MiscRegIndex miscReg, CPTR cptr,
+                       ExceptionLevel el, bool * isVfpNeon);
+
+bool
+vfpNeonEnabled(uint32_t &seq, HCPTR hcptr, NSACR nsacr, CPACR cpacr, CPSR cpsr,
+               uint32_t &iss, bool &trap, ThreadContext *tc,
+               FPEXC fpexc = (1<<30), bool isSIMD = false);
+
+static inline bool
+vfpNeon64Enabled(CPACR cpacr, ExceptionLevel el)
+{
+    if ((el == EL0 && cpacr.fpen != 0x3) ||
+        (el == EL1 && !(cpacr.fpen & 0x1)))
+        return false;
+    return true;
+}
+
+bool SPAlignmentCheckEnabled(ThreadContext* tc);
+
 uint64_t getArgument(ThreadContext *tc, int &number, uint16_t size, bool fp);
 
 void skipFunction(ThreadContext *tc);
@@ -182,6 +293,36 @@ getExecutingAsid(ThreadContext *tc)
     return tc->readMiscReg(MISCREG_CONTEXTIDR);
 }
 
+// Decodes the register index to access based on the fields used in a MSR
+// or MRS instruction
+bool
+decodeMrsMsrBankedReg(uint8_t sysM, bool r, bool &isIntReg, int &regIdx,
+                      CPSR cpsr, SCR scr, NSACR nsacr,
+                      bool checkSecurity = true);
+
+// This wrapper function is used to turn the register index into a source
+// parameter for the instruction. See Operands.isa
+static inline int
+decodeMrsMsrBankedIntRegIndex(uint8_t sysM, bool r)
+{
+    int  regIdx;
+    bool isIntReg;
+    bool validReg;
+
+    validReg = decodeMrsMsrBankedReg(sysM, r, isIntReg, regIdx, 0, 0, 0, false);
+    return (validReg && isIntReg) ? regIdx : INTREG_DUMMY;
+}
+
+/**
+ * Returns the n. of PA bits corresponding to the specified encoding.
+ */
+int decodePhysAddrRange64(uint8_t pa_enc);
+
+/**
+ * Returns the encoding corresponding to the specified n. of PA bits.
+ */
+uint8_t encodePhysAddrRange64(int pa_size);
+
 }
 
 #endif
diff --git a/src/arch/arm/vtophys.cc b/src/arch/arm/vtophys.cc
index 7c26962cb..bed76acbd 100644
--- a/src/arch/arm/vtophys.cc
+++ b/src/arch/arm/vtophys.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2012-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -45,6 +45,7 @@
 
 #include <string>
 
+#include "arch/arm/faults.hh"
 #include "arch/arm/table_walker.hh"
 #include "arch/arm/tlb.hh"
 #include "arch/arm/vtophys.hh"
@@ -65,66 +66,30 @@ ArmISA::vtophys(Addr vaddr)
 Addr
 ArmISA::vtophys(ThreadContext *tc, Addr addr)
 {
-    SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR);
-    if (!sctlr.m) {
-        // Translation is currently disabled PA == VA
-        return addr;
-    }
-    bool success;
-    Addr pa;
+    Fault fault;
+    // Set up a functional memory Request to pass to the TLB
+    // to get it to translate the vaddr to a paddr
+    Request req(0, addr, 64, 0x40, -1, 0, 0, 0);
     ArmISA::TLB *tlb;
 
-    // Check the TLBs far a translation
-    // It's possible that there is a validy translation in the tlb
+    // Check the TLBs for a translation
+    // It's possible that there is a valid translation in the tlb
     // that is no loger valid in the page table in memory
     // so we need to check here first
+    //
+    // Calling translateFunctional invokes a table-walk if required
+    // so we should always succeed
     tlb = static_cast<ArmISA::TLB*>(tc->getDTBPtr());
-    success = tlb->translateFunctional(tc, addr, pa);
-    if (success)
-        return pa;
+    fault = tlb->translateFunctional(&req, tc, BaseTLB::Read, TLB::NormalTran);
+    if (fault == NoFault)
+        return req.getPaddr();
 
     tlb = static_cast<ArmISA::TLB*>(tc->getITBPtr());
-    success = tlb->translateFunctional(tc, addr, pa);
-    if (success)
-        return pa;
+    fault = tlb->translateFunctional(&req, tc, BaseTLB::Read, TLB::NormalTran);
+    if (fault == NoFault)
+        return req.getPaddr();
 
-    // We've failed everything, so we need to do a
-    // hardware tlb walk without messing with any
-    // state
-
-    uint32_t N = tc->readMiscReg(MISCREG_TTBCR);
-    Addr ttbr;
-    if (N == 0 || !mbits(addr, 31, 32-N)) {
-        ttbr = tc->readMiscReg(MISCREG_TTBR0);
-    } else {
-        ttbr = tc->readMiscReg(MISCREG_TTBR1);
-        N = 0;
-    }
-
-    PortProxy &port = tc->getPhysProxy();
-    Addr l1desc_addr = mbits(ttbr, 31, 14-N) | (bits(addr,31-N,20) << 2);
-
-    TableWalker::L1Descriptor l1desc;
-    l1desc.data = port.read<uint32_t>(l1desc_addr);
-    if (l1desc.type() == TableWalker::L1Descriptor::Ignore ||
-            l1desc.type() == TableWalker::L1Descriptor::Reserved) {
-        warn("Unable to translate virtual address: %#x\n", addr);
-        return -1;
-    }
-    if (l1desc.type() == TableWalker::L1Descriptor::Section)
-        return l1desc.paddr(addr);
-
-    // Didn't find it at the first level, try againt
-    Addr l2desc_addr = l1desc.l2Addr() | (bits(addr, 19, 12) << 2);
-    TableWalker::L2Descriptor l2desc;
-    l2desc.data = port.read<uint32_t>(l2desc_addr);
-
-    if (l2desc.invalid()) {
-        warn("Unable to translate virtual address: %#x\n", addr);
-        return -1;
-    }
-
-    return l2desc.paddr(addr);
+    panic("Table walkers support functional accesses. We should never get here\n");
 }
 
 bool
diff --git a/src/base/loader/elf_object.cc b/src/base/loader/elf_object.cc
index 6d4c29776..9445f1df9 100644
--- a/src/base/loader/elf_object.cc
+++ b/src/base/loader/elf_object.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2011-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -61,7 +73,7 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data)
     assert(elf != NULL);
 
     // Check that we actually have a elf file
-    if (gelf_getehdr(elf, &ehdr) ==0) {
+    if (gelf_getehdr(elf, &ehdr) == 0) {
         DPRINTFR(Loader, "Not ELF\n");
         elf_end(elf);
         return NULL;
@@ -94,23 +106,27 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data)
         } else if (ehdr.e_machine == EM_386 &&
                 ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
             arch = ObjectFile::I386;
-        } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
-            arch = ObjectFile::Alpha;
-        } else if (ehdr.e_machine == EM_ARM) {
+        } else if (ehdr.e_machine == EM_ARM &&
+                ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
             if (bits(ehdr.e_entry, 0)) {
                 arch = ObjectFile::Thumb;
             } else {
                 arch = ObjectFile::Arm;
             }
+        } else if ((ehdr.e_machine == EM_AARCH64) &&
+                ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
+            arch = ObjectFile::Arm64;
+        } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
+            arch = ObjectFile::Alpha;
         } else if (ehdr.e_machine == EM_PPC &&
                 ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
-          if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
-                arch = ObjectFile::Power;
-          } else {
-                fatal("The binary you're trying to load is compiled for "
+            if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
+                  arch = ObjectFile::Power;
+            } else {
+                  fatal("The binary you're trying to load is compiled for "
                         "little endian Power.\nM5 only supports big "
                         "endian Power. Please recompile your binary.\n");
-          }
+            }
         } else if (ehdr.e_machine == EM_PPC64) {
             fatal("The binary you're trying to load is compiled for 64-bit "
                   "Power. M5\n only supports 32-bit Power. Please "
@@ -121,9 +137,7 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data)
         }
 
         //Detect the operating system
-        switch (ehdr.e_ident[EI_OSABI])
-        {
-
+        switch (ehdr.e_ident[EI_OSABI]) {
           case ELFOSABI_LINUX:
             opSys = ObjectFile::Linux;
             break;
@@ -206,7 +220,8 @@ ElfObject::tryFile(const string &fname, int fd, size_t len, uint8_t *data)
                 if(phdr.p_offset <= e_phoff &&
                         phdr.p_offset + phdr.p_filesz > e_phoff)
                 {
-                    result->_programHeaderTable = phdr.p_paddr + e_phoff;
+                    result->_programHeaderTable =
+                        phdr.p_paddr + (e_phoff - phdr.p_offset);
                     break;
                 }
             }
@@ -423,15 +438,15 @@ ElfObject::loadWeakSymbols(SymbolTable *symtab, Addr addrMask)
 }
 
 bool
-ElfObject::loadSections(PortProxy& memProxy, Addr addrMask)
+ElfObject::loadSections(PortProxy& memProxy, Addr addrMask, Addr offset)
 {
-    if (!ObjectFile::loadSections(memProxy, addrMask))
+    if (!ObjectFile::loadSections(memProxy, addrMask, offset))
         return false;
 
     vector<Segment>::iterator extraIt;
     for (extraIt = extraSegments.begin();
             extraIt != extraSegments.end(); extraIt++) {
-        if (!loadSection(&(*extraIt), memProxy, addrMask)) {
+        if (!loadSection(&(*extraIt), memProxy, addrMask, offset)) {
             return false;
         }
     }
diff --git a/src/base/loader/elf_object.hh b/src/base/loader/elf_object.hh
index d3d3e5197..84b73b0a8 100644
--- a/src/base/loader/elf_object.hh
+++ b/src/base/loader/elf_object.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2003-2005 The Regents of The University of Michigan
  * All rights reserved.
  *
@@ -66,7 +78,8 @@ class ElfObject : public ObjectFile
     virtual ~ElfObject() {}
 
     bool loadSections(PortProxy& memProxy,
-            Addr addrMask = std::numeric_limits<Addr>::max());
+            Addr addrMask = std::numeric_limits<Addr>::max(),
+            Addr offset = 0);
     virtual bool loadGlobalSymbols(SymbolTable *symtab, Addr addrMask =
             std::numeric_limits<Addr>::max());
     virtual bool loadLocalSymbols(SymbolTable *symtab, Addr addrMask =
diff --git a/src/base/loader/object_file.cc b/src/base/loader/object_file.cc
index b9f84283b..170e18d5e 100644
--- a/src/base/loader/object_file.cc
+++ b/src/base/loader/object_file.cc
@@ -66,10 +66,10 @@ ObjectFile::~ObjectFile()
 
 
 bool
-ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask)
+ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask, Addr offset)
 {
     if (sec->size != 0) {
-        Addr addr = sec->baseAddr & addrMask;
+        Addr addr = (sec->baseAddr & addrMask) + offset;
         if (sec->fileImage) {
             memProxy.writeBlob(addr, sec->fileImage, sec->size);
         }
@@ -83,11 +83,11 @@ ObjectFile::loadSection(Section *sec, PortProxy& memProxy, Addr addrMask)
 
 
 bool
-ObjectFile::loadSections(PortProxy& memProxy, Addr addrMask)
+ObjectFile::loadSections(PortProxy& memProxy, Addr addrMask, Addr offset)
 {
-    return (loadSection(&text, memProxy, addrMask)
-            && loadSection(&data, memProxy, addrMask)
-            && loadSection(&bss, memProxy, addrMask));
+    return (loadSection(&text, memProxy, addrMask, offset)
+            && loadSection(&data, memProxy, addrMask, offset)
+            && loadSection(&bss, memProxy, addrMask, offset));
 }
 
 
diff --git a/src/base/loader/object_file.hh b/src/base/loader/object_file.hh
index bdc9a31a1..09cde5b53 100644
--- a/src/base/loader/object_file.hh
+++ b/src/base/loader/object_file.hh
@@ -52,6 +52,7 @@ class ObjectFile
         Mips,
         X86_64,
         I386,
+        Arm64,
         Arm,
         Thumb,
         Power
@@ -84,7 +85,8 @@ class ObjectFile
     void close();
 
     virtual bool loadSections(PortProxy& memProxy, Addr addrMask =
-            std::numeric_limits<Addr>::max());
+                              std::numeric_limits<Addr>::max(),
+                              Addr offset = 0);
     virtual bool loadGlobalSymbols(SymbolTable *symtab, Addr addrMask =
             std::numeric_limits<Addr>::max()) = 0;
     virtual bool loadLocalSymbols(SymbolTable *symtab, Addr addrMask =
@@ -114,7 +116,8 @@ class ObjectFile
     Section data;
     Section bss;
 
-    bool loadSection(Section *sec, PortProxy& memProxy, Addr addrMask);
+    bool loadSection(Section *sec, PortProxy& memProxy, Addr addrMask,
+                     Addr offset = 0);
     void setGlobalPointer(Addr global_ptr) { globalPtr = global_ptr; }
 
   public:
diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py
index cd82207cd..652af0b80 100644
--- a/src/cpu/BaseCPU.py
+++ b/src/cpu/BaseCPU.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012 ARM Limited
+# Copyright (c) 2012-2013 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -76,7 +76,7 @@ elif buildEnv['TARGET_ISA'] == 'mips':
     from MipsISA import MipsISA
     isa_class = MipsISA
 elif buildEnv['TARGET_ISA'] == 'arm':
-    from ArmTLB import ArmTLB
+    from ArmTLB import ArmTLB, ArmStage2IMMU, ArmStage2DMMU
     from ArmInterrupts import ArmInterrupts
     from ArmISA import ArmISA
     isa_class = ArmISA
@@ -171,6 +171,8 @@ class BaseCPU(MemObject):
     elif buildEnv['TARGET_ISA'] == 'arm':
         dtb = Param.ArmTLB(ArmTLB(), "Data TLB")
         itb = Param.ArmTLB(ArmTLB(), "Instruction TLB")
+        istage2_mmu = Param.ArmStage2MMU(ArmStage2IMMU(), "Stage 2 trans")
+        dstage2_mmu = Param.ArmStage2MMU(ArmStage2DMMU(), "Stage 2 trans")
         interrupts = Param.ArmInterrupts(
                 NULL, "Interrupt Controller")
         isa = VectorParam.ArmISA([ isa_class() ], "ISA instance")
@@ -211,6 +213,9 @@ class BaseCPU(MemObject):
 
     if buildEnv['TARGET_ISA'] in ['x86', 'arm']:
         _cached_ports += ["itb.walker.port", "dtb.walker.port"]
+        if buildEnv['TARGET_ISA'] in ['arm']:
+            _cached_ports += ["istage2_mmu.stage2_tlb.walker.port",
+                              "dstage2_mmu.stage2_tlb.walker.port"]
 
     _uncached_slave_ports = []
     _uncached_master_ports = []
@@ -267,18 +272,35 @@ class BaseCPU(MemObject):
             if iwc and dwc:
                 self.itb_walker_cache = iwc
                 self.dtb_walker_cache = dwc
-                self.itb.walker.port = iwc.cpu_side
-                self.dtb.walker.port = dwc.cpu_side
+                if buildEnv['TARGET_ISA'] in ['arm']:
+                    self.itb_walker_cache_bus = CoherentBus()
+                    self.dtb_walker_cache_bus = CoherentBus()
+                    self.itb_walker_cache_bus.master = iwc.cpu_side
+                    self.dtb_walker_cache_bus.master = dwc.cpu_side
+                    self.itb.walker.port = self.itb_walker_cache_bus.slave
+                    self.dtb.walker.port = self.dtb_walker_cache_bus.slave
+                    self.istage2_mmu.stage2_tlb.walker.port = self.itb_walker_cache_bus.slave
+                    self.dstage2_mmu.stage2_tlb.walker.port = self.dtb_walker_cache_bus.slave
+                else:
+                    self.itb.walker.port = iwc.cpu_side
+                    self.dtb.walker.port = dwc.cpu_side
                 self._cached_ports += ["itb_walker_cache.mem_side", \
                                        "dtb_walker_cache.mem_side"]
             else:
                 self._cached_ports += ["itb.walker.port", "dtb.walker.port"]
 
+                if buildEnv['TARGET_ISA'] in ['arm']:
+                    self._cached_ports += ["istage2_mmu.stage2_tlb.walker.port", \
+                                           "dstage2_mmu.stage2_tlb.walker.port"]
+
             # Checker doesn't need its own tlb caches because it does
             # functional accesses only
             if self.checker != NULL:
                 self._cached_ports += ["checker.itb.walker.port", \
                                        "checker.dtb.walker.port"]
+                if buildEnv['TARGET_ISA'] in ['arm']:
+                    self._cached_ports += ["checker.istage2_mmu.stage2_tlb.walker.port", \
+                                           "checker.dstage2_mmu.stage2_tlb.walker.port"]
 
     def addTwoLevelCacheHierarchy(self, ic, dc, l2c, iwc = None, dwc = None):
         self.addPrivateSplitL1Caches(ic, dc, iwc, dwc)
diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py
index b3c14580e..3c9c22ecc 100644
--- a/src/dev/arm/RealView.py
+++ b/src/dev/arm/RealView.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2009-2012 ARM Limited
+# Copyright (c) 2009-2013 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -88,6 +88,17 @@ class RealViewCtrl(BasicPioDevice):
     proc_id1 = Param.UInt32(0x0C000222, "Processor ID, SYS_PROCID1")
     idreg = Param.UInt32(0x00000000, "ID Register, SYS_ID")
 
+class VGic(PioDevice):
+    type = 'VGic'
+    cxx_header = "dev/arm/vgic.hh"
+    gic = Param.BaseGic(Parent.any, "Gic to use for interrupting")
+    platform = Param.Platform(Parent.any, "Platform this device is part of.")
+    vcpu_addr = Param.Addr(0, "Address for vcpu interfaces")
+    hv_addr = Param.Addr(0, "Address for hv control")
+    pio_delay = Param.Latency('10ns', "Delay for PIO r/w")
+   # The number of list registers is not currently configurable at runtime.
+    ppint = Param.UInt32("HV maintenance interrupt number")
+
 class AmbaFake(AmbaPioDevice):
     type = 'AmbaFake'
     cxx_header = "dev/arm/amba_fake.hh"
@@ -119,6 +130,15 @@ class CpuLocalTimer(BasicPioDevice):
     int_num_timer = Param.UInt32("Interrrupt number used per-cpu to GIC")
     int_num_watchdog = Param.UInt32("Interrupt number for per-cpu watchdog to GIC")
 
+class GenericTimer(SimObject):
+    type = 'GenericTimer'
+    cxx_header = "dev/arm/generic_timer.hh"
+    system = Param.System(Parent.any, "system")
+    gic = Param.BaseGic(Parent.any, "GIC to use for interrupting")
+    int_num = Param.UInt32("Interrupt number used per-cpu to GIC")
+    # @todo: for now only one timer per CPU is supported, which is the
+    # normal behaviour when Security and Virt. extensions are disabled.
+
 class PL031(AmbaIntDevice):
     type = 'PL031'
     cxx_header = "dev/arm/rtc_pl031.hh"
@@ -166,6 +186,9 @@ class RealView(Platform):
                                   conf_table_reported = False)
         self.nvmem.port = mem_bus.master
         cur_sys.boot_loader = loc('boot.arm')
+        cur_sys.atags_addr = 0x100
+        cur_sys.load_addr_mask = 0xfffffff
+        cur_sys.load_offset = 0
 
 
 # Reference for memory map and interrupt number
@@ -340,12 +363,14 @@ class VExpress_EMM(RealView):
     realview_io = RealViewCtrl(proc_id0=0x14000000, proc_id1=0x14000000, pio_addr=0x1C010000)
     gic = Pl390(dist_addr=0x2C001000, cpu_addr=0x2C002000)
     local_cpu_timer = CpuLocalTimer(int_num_timer=29, int_num_watchdog=30, pio_addr=0x2C080000)
+    generic_timer = GenericTimer(int_num=29)
     timer0 = Sp804(int_num0=34, int_num1=34, pio_addr=0x1C110000, clock0='1MHz', clock1='1MHz')
     timer1 = Sp804(int_num0=35, int_num1=35, pio_addr=0x1C120000, clock0='1MHz', clock1='1MHz')
     clcd   = Pl111(pio_addr=0x1c1f0000, int_num=46)
     hdlcd  = HDLcd(pio_addr=0x2b000000, int_num=117)
     kmi0   = Pl050(pio_addr=0x1c060000, int_num=44)
     kmi1   = Pl050(pio_addr=0x1c070000, int_num=45, is_mouse=True)
+    vgic   = VGic(vcpu_addr=0x2c006000, hv_addr=0x2c004000, ppint=25)
     cf_ctrl = IdeController(disks=[], pci_func=0, pci_dev=0, pci_bus=2,
                             io_shift = 2, ctrl_offset = 2, Command = 0x1,
                             BAR0 = 0x1C1A0000, BAR0Size = '256B',
@@ -380,7 +405,9 @@ class VExpress_EMM(RealView):
                                   conf_table_reported = False)
         self.nvmem.port = mem_bus.master
         cur_sys.boot_loader = loc('boot_emm.arm')
-        cur_sys.atags_addr = 0x80000100
+        cur_sys.atags_addr = 0x8000000
+        cur_sys.load_addr_mask = 0xfffffff
+        cur_sys.load_offset = 0x80000000
 
     # Attach I/O devices that are on chip and also set the appropriate
     # ranges for the bridge
@@ -396,6 +423,8 @@ class VExpress_EMM(RealView):
                         AddrRange(0x40000000, size='512MB'),
                         AddrRange(0x18000000, size='64MB'),
                         AddrRange(0x1C000000, size='64MB')]
+       self.vgic.pio = bus.master
+
 
     # Attach I/O devices to specified bus object.  Can't do this
     # earlier, since the bus object itself is typically defined at the
@@ -435,3 +464,13 @@ class VExpress_EMM(RealView):
        self.usb_fake.pio        = bus.master
        self.mmc_fake.pio        = bus.master
 
+class VExpress_EMM64(VExpress_EMM):
+    def setupBootLoader(self, mem_bus, cur_sys, loc):
+        self.nvmem = SimpleMemory(range = AddrRange(0, size = '64MB'))
+        self.nvmem.port = mem_bus.master
+        cur_sys.boot_loader = loc('boot_emm.arm64')
+        cur_sys.atags_addr = 0x8000000
+        cur_sys.load_addr_mask = 0xfffffff
+        cur_sys.load_offset = 0x80000000
+
+
diff --git a/src/dev/arm/SConscript b/src/dev/arm/SConscript
index 68779ec64..419e2f471 100644
--- a/src/dev/arm/SConscript
+++ b/src/dev/arm/SConscript
@@ -1,6 +1,6 @@
 # -*- mode:python -*-
 
-# Copyright (c) 2009 ARM Limited
+# Copyright (c) 2009, 2012-2013 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -47,6 +47,7 @@ if env['TARGET_ISA'] == 'arm':
     Source('amba_device.cc')
     Source('amba_fake.cc')
     Source('base_gic.cc')
+    Source('generic_timer.cc')
     Source('gic_pl390.cc')
     Source('pl011.cc')
     Source('pl111.cc')
@@ -57,6 +58,7 @@ if env['TARGET_ISA'] == 'arm':
     Source('realview.cc')
     Source('rtc_pl031.cc')
     Source('timer_cpulocal.cc')
+    Source('vgic.cc')
 
     DebugFlag('AMBA')
     DebugFlag('HDLcd')
@@ -64,3 +66,4 @@ if env['TARGET_ISA'] == 'arm':
     DebugFlag('Pl050')
     DebugFlag('GIC')
     DebugFlag('RVCTRL')
+    DebugFlag('VGIC')
diff --git a/src/dev/arm/generic_timer.cc b/src/dev/arm/generic_timer.cc
new file mode 100644
index 000000000..555c1050f
--- /dev/null
+++ b/src/dev/arm/generic_timer.cc
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2013 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Giacomo Gabrielli
+ */
+
+#include "arch/arm/system.hh"
+#include "debug/Checkpoint.hh"
+#include "debug/Timer.hh"
+#include "dev/arm/base_gic.hh"
+#include "dev/arm/generic_timer.hh"
+
+void
+GenericTimer::SystemCounter::setFreq(uint32_t freq)
+{
+    if (_freq != 0) {
+        // Altering the frequency after boot shouldn't be done in practice.
+        warn_once("The frequency of the system counter has already been set");
+    }
+    _freq = freq;
+    _period = (1.0 / freq) * SimClock::Frequency;
+    _resetTick = curTick();
+}
+
+void
+GenericTimer::SystemCounter::serialize(std::ostream &os)
+{
+    SERIALIZE_SCALAR(_freq);
+    SERIALIZE_SCALAR(_period);
+    SERIALIZE_SCALAR(_resetTick);
+}
+
+void
+GenericTimer::SystemCounter::unserialize(Checkpoint *cp,
+                                         const std::string &section)
+{
+    UNSERIALIZE_SCALAR(_freq);
+    UNSERIALIZE_SCALAR(_period);
+    UNSERIALIZE_SCALAR(_resetTick);
+}
+
+void
+GenericTimer::ArchTimer::counterLimitReached()
+{
+    _control.istatus = 1;
+
+    if (!_control.enable)
+        return;
+
+    // DPRINTF(Timer, "Counter limit reached\n");
+
+    if (!_control.imask) {
+        // DPRINTF(Timer, "Causing interrupt\n");
+        _parent->_gic->sendPPInt(_intNum, _cpuNum);
+    }
+}
+
+void
+GenericTimer::ArchTimer::setCompareValue(uint64_t val)
+{
+    _counterLimit = val;
+    if (_counterLimitReachedEvent.scheduled())
+        _parent->deschedule(_counterLimitReachedEvent);
+    if (counterValue() >= _counterLimit) {
+        counterLimitReached();
+    } else {
+        _control.istatus = 0;
+        _parent->schedule(_counterLimitReachedEvent,
+             curTick() + (_counterLimit - counterValue()) * _counter->period());
+    }
+}
+
+void
+GenericTimer::ArchTimer::setTimerValue(uint32_t val)
+{
+    setCompareValue(counterValue() + sext<32>(val));
+}
+
+void
+GenericTimer::ArchTimer::setControl(uint32_t val)
+{
+    ArchTimerCtrl new_ctl = val;
+    if ((new_ctl.enable && !new_ctl.imask) &&
+        !(_control.enable && !_control.imask)) {
+        // Re-evalute the timer condition
+        if (_counterLimit >= counterValue()) {
+            _control.istatus = 1;
+
+            DPRINTF(Timer, "Causing interrupt in control\n");
+            //_parent->_gic->sendPPInt(_intNum, _cpuNum);
+        }
+    }
+    _control.enable = new_ctl.enable;
+    _control.imask = new_ctl.imask;
+}
+
+void
+GenericTimer::ArchTimer::serialize(std::ostream &os)
+{
+    SERIALIZE_SCALAR(_cpuNum);
+    SERIALIZE_SCALAR(_intNum);
+    uint32_t control_serial = _control;
+    SERIALIZE_SCALAR(control_serial);
+    SERIALIZE_SCALAR(_counterLimit);
+    bool event_scheduled = _counterLimitReachedEvent.scheduled();
+    SERIALIZE_SCALAR(event_scheduled);
+    Tick event_time;
+    if (event_scheduled) {
+        event_time = _counterLimitReachedEvent.when();
+        SERIALIZE_SCALAR(event_time);
+    }
+}
+
+void
+GenericTimer::ArchTimer::unserialize(Checkpoint *cp, const std::string &section)
+{
+    UNSERIALIZE_SCALAR(_cpuNum);
+    UNSERIALIZE_SCALAR(_intNum);
+    uint32_t control_serial;
+    UNSERIALIZE_SCALAR(control_serial);
+    _control = control_serial;
+    bool event_scheduled;
+    UNSERIALIZE_SCALAR(event_scheduled);
+    Tick event_time;
+    if (event_scheduled) {
+        UNSERIALIZE_SCALAR(event_time);
+        _parent->schedule(_counterLimitReachedEvent, event_time);
+    }
+}
+
+GenericTimer::GenericTimer(Params *p)
+    : SimObject(p), _gic(p->gic)
+{
+   for (int i = 0; i < CPU_MAX; ++i) {
+        std::stringstream oss;
+        oss << name() << ".arch_timer" << i;
+        _archTimers[i]._name = oss.str();
+        _archTimers[i]._parent = this;
+        _archTimers[i]._counter = &_systemCounter;
+        _archTimers[i]._cpuNum = i;
+        _archTimers[i]._intNum = p->int_num;
+   }
+
+   ((ArmSystem *) p->system)->setGenericTimer(this);
+}
+
+void
+GenericTimer::serialize(std::ostream &os)
+{
+    nameOut(os, csprintf("%s.sys_counter", name()));
+    _systemCounter.serialize(os);
+    for (int i = 0; i < CPU_MAX; ++i) {
+        nameOut(os, csprintf("%s.arch_timer%d", name(), i));
+        _archTimers[i].serialize(os);
+    }
+}
+
+void
+GenericTimer::unserialize(Checkpoint *cp, const std::string &section)
+{
+    _systemCounter.unserialize(cp, csprintf("%s.sys_counter", section));
+    for (int i = 0; i < CPU_MAX; ++i) {
+        _archTimers[i].unserialize(cp, csprintf("%s.arch_timer%d", section, i));
+    }
+}
+
+GenericTimer *
+GenericTimerParams::create()
+{
+    return new GenericTimer(this);
+}
diff --git a/src/dev/arm/generic_timer.hh b/src/dev/arm/generic_timer.hh
new file mode 100644
index 000000000..bc43f8b3b
--- /dev/null
+++ b/src/dev/arm/generic_timer.hh
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2013 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Giacomo Gabrielli
+ */
+
+#ifndef __DEV_ARM_GENERIC_TIMER_HH__
+#define __DEV_ARM_GENERIC_TIMER_HH__
+
+#include "base/bitunion.hh"
+#include "params/GenericTimer.hh"
+#include "sim/core.hh"
+#include "sim/sim_object.hh"
+
+/// @file
+/// This module implements the global system counter and the local per-CPU
+/// architected timers as specified by the ARM Generic Timer extension (ARM
+/// ARM, Issue C, Chapter 17).
+
+class Checkpoint;
+class BaseGic;
+
+/// Wrapper around the actual counters and timers of the Generic Timer
+/// extension.
+class GenericTimer : public SimObject
+{
+  public:
+
+    /// Global system counter.  It is shared by the architected timers.
+    /// @todo: implement memory-mapped controls
+    class SystemCounter
+    {
+      protected:
+        /// Counter frequency (as specified by CNTFRQ).
+        uint64_t _freq;
+        /// Cached copy of the counter period (inverse of the frequency).
+        Tick _period;
+        /// Tick when the counter was reset.
+        Tick _resetTick;
+
+      public:
+        /// Ctor.
+        SystemCounter()
+            : _freq(0), _period(0), _resetTick(0)
+        {
+            setFreq(0x01800000);
+        }
+
+        /// Returns the current value of the physical counter.
+        uint64_t value() const
+        {
+            if (_freq == 0)
+                return 0;  // Counter is still off.
+            return (curTick() - _resetTick) / _period;
+        }
+
+        /// Returns the counter frequency.
+        uint64_t freq() const { return _freq; }
+        /// Sets the counter frequency.
+        /// @param freq frequency in Hz.
+        void setFreq(uint32_t freq);
+
+        /// Returns the counter period.
+        Tick period() const { return _period; }
+
+        void serialize(std::ostream &os);
+        void unserialize(Checkpoint *cp, const std::string &section);
+    };
+
+    /// Per-CPU architected timer.
+    class ArchTimer
+    {
+      protected:
+        /// Control register.
+        BitUnion32(ArchTimerCtrl)
+            Bitfield<0> enable;
+            Bitfield<1> imask;
+            Bitfield<2> istatus;
+        EndBitUnion(ArchTimerCtrl)
+
+        /// Name of this timer.
+        std::string _name;
+        /// Pointer to parent class.
+        GenericTimer *_parent;
+        /// Pointer to the global system counter.
+        SystemCounter *_counter;
+        /// ID of the CPU this timer is attached to.
+        int _cpuNum;
+        /// ID of the interrupt to be triggered.
+        int _intNum;
+        /// Cached value of the control register ({CNTP/CNTHP/CNTV}_CTL).
+        ArchTimerCtrl _control;
+        /// Programmed limit value for the upcounter ({CNTP/CNTHP/CNTV}_CVAL).
+        uint64_t _counterLimit;
+
+        /// Called when the upcounter reaches the programmed value.
+        void counterLimitReached();
+        EventWrapper<ArchTimer, &ArchTimer::counterLimitReached>
+            _counterLimitReachedEvent;
+
+        /// Returns the value of the counter which this timer relies on.
+        uint64_t counterValue() const { return _counter->value(); }
+
+      public:
+        /// Ctor.
+        ArchTimer()
+            : _control(0), _counterLimit(0), _counterLimitReachedEvent(this)
+        {}
+
+        /// Returns the timer name.
+        std::string name() const { return _name; }
+
+        /// Returns the CompareValue view of the timer.
+        uint64_t compareValue() const { return _counterLimit; }
+        /// Sets the CompareValue view of the timer.
+        void setCompareValue(uint64_t val);
+
+        /// Returns the TimerValue view of the timer.
+        uint32_t timerValue() const { return _counterLimit - counterValue(); }
+        /// Sets the TimerValue view of the timer.
+        void setTimerValue(uint32_t val);
+
+        /// Sets the control register.
+        uint32_t control() const { return _control; }
+        void setControl(uint32_t val);
+
+        virtual void serialize(std::ostream &os);
+        virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+        friend class GenericTimer;
+    };
+
+  protected:
+
+    static const int CPU_MAX = 8;
+
+    /// Pointer to the GIC, needed to trigger timer interrupts.
+    BaseGic *_gic;
+    /// System counter.
+    SystemCounter _systemCounter;
+    /// Per-CPU architected timers.
+    // @todo: this would become a 2-dim. array with Security and Virt.
+    ArchTimer _archTimers[CPU_MAX];
+
+  public:
+    typedef GenericTimerParams Params;
+    const Params *
+    params() const
+    {
+        return dynamic_cast<const Params *>(_params);
+    }
+
+    /// Ctor.
+    GenericTimer(Params *p);
+
+    /// Returns a pointer to the system counter.
+    SystemCounter *getSystemCounter() { return &_systemCounter; }
+
+    /// Returns a pointer to the architected timer for cpu_id.
+    ArchTimer *getArchTimer(int cpu_id) { return &_archTimers[cpu_id]; }
+
+    virtual void serialize(std::ostream &os);
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+};
+
+#endif // __DEV_ARM_GENERIC_TIMER_HH__
diff --git a/src/dev/arm/gic_pl390.cc b/src/dev/arm/gic_pl390.cc
index d2a660e88..7fc65b2b7 100644
--- a/src/dev/arm/gic_pl390.cc
+++ b/src/dev/arm/gic_pl390.cc
@@ -56,7 +56,8 @@ Pl390::Pl390(const Params *p)
     : BaseGic(p), distAddr(p->dist_addr),
       cpuAddr(p->cpu_addr), distPioDelay(p->dist_pio_delay),
       cpuPioDelay(p->cpu_pio_delay), intLatency(p->int_latency),
-      enabled(false), itLines(p->it_lines), msixRegAddr(p->msix_addr),
+      enabled(false), itLines(p->it_lines), irqEnable(false),
+      msixRegAddr(p->msix_addr),
       msixReg(0x0)
 {
     itLinesLog2 = ceilLog2(itLines);
diff --git a/src/dev/arm/vgic.cc b/src/dev/arm/vgic.cc
new file mode 100644
index 000000000..2faf2030e
--- /dev/null
+++ b/src/dev/arm/vgic.cc
@@ -0,0 +1,553 @@
+/*
+ * Copyright (c) 2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Matt Evans
+ */
+
+#include "base/trace.hh"
+#include "debug/Checkpoint.hh"
+#include "debug/VGIC.hh"
+#include "dev/arm/base_gic.hh"
+#include "dev/arm/vgic.hh"
+#include "dev/terminal.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
+
+VGic::VGic(const Params *p)
+    : PioDevice(p), platform(p->platform), gic(p->gic), vcpuAddr(p->vcpu_addr),
+      hvAddr(p->hv_addr), pioDelay(p->pio_delay),
+      maintInt(p->ppint)
+{
+    for (int x = 0; x < VGIC_CPU_MAX; x++) {
+        postVIntEvent[x] = new PostVIntEvent(x, p->platform);
+        maintIntPosted[x] = false;
+        vIntPosted[x] = false;
+    }
+    for (int c = 0; c < VGIC_CPU_MAX; c++) {
+        memset(&vcpuData[c], 0, sizeof(struct vcpuIntData));
+    }
+    assert(sys->numRunningContexts() <= VGIC_CPU_MAX);
+}
+
+Tick
+VGic::read(PacketPtr pkt)
+{
+    Addr addr = pkt->getAddr();
+
+    if (addr >= vcpuAddr && addr < vcpuAddr + GICV_SIZE)
+        return readVCpu(pkt);
+    else if (addr >= hvAddr && addr < hvAddr + GICH_REG_SIZE)
+        return readCtrl(pkt);
+    else
+        panic("Read to unknown address %#x\n", pkt->getAddr());
+}
+
+Tick
+VGic::write(PacketPtr pkt)
+{
+    Addr addr = pkt->getAddr();
+
+    if (addr >= vcpuAddr && addr < vcpuAddr + GICV_SIZE)
+        return writeVCpu(pkt);
+    else if (addr >= hvAddr && addr < hvAddr + GICH_REG_SIZE)
+        return writeCtrl(pkt);
+    else
+        panic("Write to unknown address %#x\n", pkt->getAddr());
+}
+
+Tick
+VGic::readVCpu(PacketPtr pkt)
+{
+    Addr daddr = pkt->getAddr() - vcpuAddr;
+    pkt->allocate();
+
+    int ctx_id = pkt->req->contextId();
+    assert(ctx_id < VGIC_CPU_MAX);
+    struct vcpuIntData *vid = &vcpuData[ctx_id];
+
+    DPRINTF(VGIC, "VGIC VCPU read register %#x\n", daddr);
+
+    switch (daddr) {
+      case GICV_CTLR:
+        pkt->set<uint32_t>(vid->vctrl);
+        break;
+      case GICV_IAR: {
+          int i = findHighestPendingLR(vid);
+          if (i < 0 || !vid->vctrl.En) {
+              pkt->set<uint32_t>(1023); // "No int" marker
+          } else {
+              ListReg *lr = &vid->LR[i];
+
+              pkt->set<uint32_t>(lr->VirtualID |
+                                 (((int)lr->CpuID) << 10));
+              // We don't support auto-EOI of HW interrupts via real GIC!
+              // Fortunately, KVM doesn't use this.  How about Xen...? Ulp!
+              if (lr->HW)
+                  panic("VGIC does not support 'HW' List Register feature (LR %#x)!\n",
+                        *lr);
+              lr->State = LR_ACTIVE;
+              DPRINTF(VGIC, "Consumed interrupt %d (cpu%d) from LR%d (EOI%d)\n",
+                      lr->VirtualID, lr->CpuID, i, lr->EOI);
+          }
+      } break;
+      default:
+        panic("VGIC VCPU read of bad address %#x\n", daddr);
+    }
+
+    updateIntState(ctx_id);
+
+    pkt->makeAtomicResponse();
+    return pioDelay;
+}
+
+Tick
+VGic::readCtrl(PacketPtr pkt)
+{
+    Addr daddr = pkt->getAddr() - hvAddr;
+    pkt->allocate();
+
+    int ctx_id = pkt->req->contextId();
+
+    DPRINTF(VGIC, "VGIC HVCtrl read register %#x\n", daddr);
+
+    /* Munge the address: 0-0xfff is the usual space banked by requester CPU.
+     * Anything > that is 0x200-sized slices of 'per CPU' regs.
+     */
+    if (daddr & ~0x1ff) {
+        ctx_id = (daddr >> 9);
+        if (ctx_id > 8)
+            panic("VGIC: Weird unbanked hv ctrl address %#x!\n", daddr);
+        daddr &= ~0x1ff;
+    }
+    assert(ctx_id < VGIC_CPU_MAX);
+    struct vcpuIntData *vid = &vcpuData[ctx_id];
+
+    switch (daddr) {
+      case GICH_HCR:
+        pkt->set<uint32_t>(vid->hcr);
+        break;
+
+      case GICH_VTR:
+        pkt->set<uint32_t>(0x44000000 | (NUM_LR - 1));
+        break;
+
+      case GICH_VMCR:
+        pkt->set<uint32_t>(
+            ((uint32_t)vid->VMPriMask << 27) |
+            ((uint32_t)vid->VMBP << 21) |
+            ((uint32_t)vid->VMABP << 18) |
+            ((uint32_t)vid->VEM << 9) |
+            ((uint32_t)vid->VMCBPR << 4) |
+            ((uint32_t)vid->VMFiqEn << 3) |
+            ((uint32_t)vid->VMAckCtl << 2) |
+            ((uint32_t)vid->VMGrp1En << 1) |
+            ((uint32_t)vid->VMGrp0En << 0)
+            );
+        break;
+
+      case GICH_MISR:
+        pkt->set<uint32_t>(getMISR(vid));
+        break;
+
+      case GICH_EISR0:
+        pkt->set<uint32_t>(vid->eisr & 0xffffffff);
+        break;
+
+      case GICH_EISR1:
+        pkt->set<uint32_t>(vid->eisr >> 32);
+        break;
+
+      case GICH_ELSR0: {
+          uint32_t bm = 0;
+          for (int i = 0; i < ((NUM_LR < 32) ? NUM_LR : 32); i++) {
+              if (!vid->LR[i].State)
+                  bm |= 1 << i;
+          }
+          pkt->set<uint32_t>(bm);
+      } break;
+
+      case GICH_ELSR1: {
+          uint32_t bm = 0;
+          for (int i = 32; i < NUM_LR; i++) {
+              if (!vid->LR[i].State)
+                  bm |= 1 << (i-32);
+          }
+          pkt->set<uint32_t>(bm);
+      } break;
+
+      case GICH_APR0:
+        warn_once("VGIC GICH_APR read!\n");
+        pkt->set<uint32_t>(0);
+        break;
+
+      case GICH_LR0:
+      case GICH_LR1:
+      case GICH_LR2:
+      case GICH_LR3:
+        pkt->set<uint32_t>(vid->LR[(daddr - GICH_LR0) >> 2]);
+        break;
+
+      default:
+        panic("VGIC HVCtrl read of bad address %#x\n", daddr);
+    }
+
+    pkt->makeAtomicResponse();
+    return pioDelay;
+}
+
+Tick
+VGic::writeVCpu(PacketPtr pkt)
+{
+    Addr daddr = pkt->getAddr() - vcpuAddr;
+    pkt->allocate();
+
+    int ctx_id = pkt->req->contextId();
+    assert(ctx_id < VGIC_CPU_MAX);
+    struct vcpuIntData *vid = &vcpuData[ctx_id];
+
+    DPRINTF(VGIC, "VGIC VCPU write register %#x <= %#x\n", daddr, pkt->get<uint32_t>());
+
+    switch (daddr) {
+      case GICV_CTLR:
+        vid->vctrl = pkt->get<uint32_t>();
+        break;
+      case GICV_PMR:
+        vid->VMPriMask = pkt->get<uint32_t>();
+        break;
+      case GICV_EOIR: {
+          // We don't handle the split EOI-then-DIR mode.  Linux (guest)
+          // doesn't need it though.
+          assert(!vid->vctrl.EOImode);
+          uint32_t w = pkt->get<uint32_t>();
+          unsigned int virq = w & 0x3ff;
+          unsigned int vcpu = (w >> 10) & 7;
+          int i = findLRForVIRQ(vid, virq, vcpu);
+          if (i < 0) {
+              DPRINTF(VGIC, "EOIR: No LR for irq %d(cpu%d)\n", virq, vcpu);
+          } else {
+              DPRINTF(VGIC, "EOIR: Found LR%d for irq %d(cpu%d)\n", i, virq, vcpu);
+              ListReg *lr = &vid->LR[i];
+              lr->State = 0;
+              // Maintenance interrupt -- via eisr -- is flagged when
+              // LRs have EOI=1 and State=INVALID!
+          }
+      } break;
+      default:
+        panic("VGIC VCPU write %#x to unk address %#x\n", pkt->get<uint32_t>(), daddr);
+    }
+
+    // This updates the EISRs and flags IRQs:
+    updateIntState(ctx_id);
+
+    pkt->makeAtomicResponse();
+    return pioDelay;
+}
+
+Tick
+VGic::writeCtrl(PacketPtr pkt)
+{
+    Addr daddr = pkt->getAddr() - hvAddr;
+    pkt->allocate();
+
+    int ctx_id = pkt->req->contextId();
+
+    DPRINTF(VGIC, "VGIC HVCtrl write register %#x <= %#x\n", daddr, pkt->get<uint32_t>());
+
+    /* Munge the address: 0-0xfff is the usual space banked by requester CPU.
+     * Anything > that is 0x200-sized slices of 'per CPU' regs.
+     */
+    if (daddr & ~0x1ff) {
+        ctx_id = (daddr >> 9);
+        if (ctx_id > 8)
+            panic("VGIC: Weird unbanked hv ctrl address %#x!\n", daddr);
+        daddr &= ~0x1ff;
+    }
+    assert(ctx_id < VGIC_CPU_MAX);
+    struct vcpuIntData *vid = &vcpuData[ctx_id];
+
+    switch (daddr) {
+      case GICH_HCR:
+        vid->hcr = pkt->get<uint32_t>();
+        // update int state
+        break;
+
+      case GICH_VMCR: {
+          uint32_t d = pkt->get<uint32_t>();
+          vid->VMPriMask = d >> 27;
+          vid->VMBP = (d >> 21) & 7;
+          vid->VMABP = (d >> 18) & 7;
+          vid->VEM = (d >> 9) & 1;
+          vid->VMCBPR = (d >> 4) & 1;
+          vid->VMFiqEn = (d >> 3) & 1;
+          vid->VMAckCtl = (d >> 2) & 1;
+          vid->VMGrp1En = (d >> 1) & 1;
+          vid->VMGrp0En = d & 1;
+      } break;
+
+      case GICH_APR0:
+        warn_once("VGIC GICH_APR0 written, ignored\n");
+        break;
+
+      case GICH_LR0:
+      case GICH_LR1:
+      case GICH_LR2:
+      case GICH_LR3:
+        vid->LR[(daddr - GICH_LR0) >> 2] = pkt->get<uint32_t>();
+        // update int state
+        break;
+
+      default:
+        panic("VGIC HVCtrl write to bad address %#x\n", daddr);
+    }
+
+    updateIntState(ctx_id);
+
+    pkt->makeAtomicResponse();
+    return pioDelay;
+}
+
+
+uint32_t
+VGic::getMISR(struct vcpuIntData *vid)
+{
+    return (!!vid->hcr.VGrp1DIE && !vid->VMGrp1En ? 0x80 : 0) |
+        (!!vid->hcr.VGrp1EIE &&  vid->VMGrp1En ? 0x40 : 0) |
+        (!!vid->hcr.VGrp0DIE && !vid->VMGrp0En ? 0x20 : 0) |
+        (!!vid->hcr.VGrp0EIE &&  vid->VMGrp0En ? 0x10 : 0) |
+        (!!vid->hcr.NPIE && !lrPending(vid) ? 0x08 : 0) |
+        (!!vid->hcr.LRENPIE && vid->hcr.EOICount ? 0x04 : 0) |
+        (!!vid->hcr.UIE && lrValid(vid) <= 1 ? 0x02 : 0) |
+        (vid->eisr ? 0x01 : 0);
+}
+
+void
+VGic::postVInt(uint32_t cpu, Tick when)
+{
+    DPRINTF(VGIC, "Posting VIRQ to %d\n", cpu);
+    if (!(postVIntEvent[cpu]->scheduled()))
+        eventq->schedule(postVIntEvent[cpu], when);
+}
+
+void
+VGic::unPostVInt(uint32_t cpu)
+{
+    DPRINTF(VGIC, "Unposting VIRQ to %d\n", cpu);
+    platform->intrctrl->clear(cpu, ArmISA::INT_VIRT_IRQ, 0);
+}
+
+void
+VGic::postMaintInt(uint32_t cpu)
+{
+    DPRINTF(VGIC, "Posting maintenance PPI to GIC/cpu%d\n", cpu);
+    // Linux DT configures this as Level.
+    gic->sendPPInt(maintInt, cpu);
+}
+
+void
+VGic::unPostMaintInt(uint32_t cpu)
+{
+    DPRINTF(VGIC, "Unposting maintenance PPI to GIC/cpu%d\n", cpu);
+    gic->clearPPInt(maintInt, cpu);
+}
+
+/* Update state (in general); something concerned with ctx_id has changed.
+ * This may raise a maintenance interrupt.
+ */
+void
+VGic::updateIntState(int ctx_id)
+{
+    // @todo This should update APRs!
+
+    // Build EISR contents:
+    // (Cached so that regs can read them without messing about again)
+    struct vcpuIntData *tvid = &vcpuData[ctx_id];
+
+    tvid->eisr = 0;
+    for (int i = 0; i < NUM_LR; i++) {
+        if (!tvid->LR[i].State && tvid->LR[i].EOI) {
+            tvid->eisr |= 1 << i;
+        }
+    }
+
+    assert(sys->numRunningContexts() <= VGIC_CPU_MAX);
+    for (int i = 0; i < sys->numRunningContexts(); i++) {
+        struct vcpuIntData *vid = &vcpuData[i];
+        // Are any LRs active that weren't before?
+        if (!vIntPosted[i]) {
+            if (lrPending(vid) && vid->vctrl.En) {
+                vIntPosted[i] = true;
+                postVInt(i, curTick() + 1);
+            }
+        } else if (!lrPending(vid)) {
+            vIntPosted[i] = false;
+            unPostVInt(i);
+        }
+
+        // Any maintenance ints to send?
+        if (!maintIntPosted[i]) {
+            if (vid->hcr.En && getMISR(vid)) {
+                maintIntPosted[i] = true;
+                postMaintInt(i);
+            }
+        } else {
+            if (!vid->hcr.En || !getMISR(vid)) {
+                unPostMaintInt(i);
+                maintIntPosted[i] = false;
+            }
+        }
+    }
+}
+
+AddrRangeList
+VGic::getAddrRanges() const
+{
+    AddrRangeList ranges;
+    ranges.push_back(RangeSize(hvAddr, GICH_REG_SIZE));
+    ranges.push_back(RangeSize(vcpuAddr, GICV_SIZE));
+    return ranges;
+}
+
+void
+VGic::serialize(std::ostream &os)
+{
+    Tick interrupt_time[VGIC_CPU_MAX];
+    for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) {
+        interrupt_time[cpu] = 0;
+        if (postVIntEvent[cpu]->scheduled()) {
+            interrupt_time[cpu] = postVIntEvent[cpu]->when();
+        }
+    }
+
+    DPRINTF(Checkpoint, "Serializing VGIC\n");
+
+    SERIALIZE_ARRAY(interrupt_time, VGIC_CPU_MAX);
+    SERIALIZE_ARRAY(maintIntPosted, VGIC_CPU_MAX);
+    SERIALIZE_ARRAY(vIntPosted, VGIC_CPU_MAX);
+    SERIALIZE_SCALAR(vcpuAddr);
+    SERIALIZE_SCALAR(hvAddr);
+    SERIALIZE_SCALAR(pioDelay);
+    SERIALIZE_SCALAR(maintInt);
+
+    for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) {
+        nameOut(os, csprintf("%s.vcpuData%d", name(), cpu));
+        uint32_t vctrl_val = vcpuData[cpu].vctrl;
+        SERIALIZE_SCALAR(vctrl_val);
+        uint32_t hcr_val = vcpuData[cpu].hcr;
+        SERIALIZE_SCALAR(hcr_val);
+        uint64_t eisr_val = vcpuData[cpu].eisr;
+        SERIALIZE_SCALAR(eisr_val);
+        uint8_t VMGrp0En_val = vcpuData[cpu].VMGrp0En;
+        SERIALIZE_SCALAR(VMGrp0En_val);
+        uint8_t VMGrp1En_val = vcpuData[cpu].VMGrp1En;
+        SERIALIZE_SCALAR(VMGrp1En_val);
+        uint8_t VMAckCtl_val = vcpuData[cpu].VMAckCtl;
+        SERIALIZE_SCALAR(VMAckCtl_val);
+        uint8_t VMFiqEn_val = vcpuData[cpu].VMFiqEn;
+        SERIALIZE_SCALAR(VMFiqEn_val);
+        uint8_t VMCBPR_val = vcpuData[cpu].VMCBPR;
+        SERIALIZE_SCALAR(VMCBPR_val);
+        uint8_t VEM_val = vcpuData[cpu].VEM;
+        SERIALIZE_SCALAR(VEM_val);
+        uint8_t VMABP_val = vcpuData[cpu].VMABP;
+        SERIALIZE_SCALAR(VMABP_val);
+        uint8_t VMBP_val = vcpuData[cpu].VMBP;
+        SERIALIZE_SCALAR(VMBP_val);
+        uint8_t VMPriMask_val = vcpuData[cpu].VMPriMask;
+        SERIALIZE_SCALAR(VMPriMask_val);
+
+        for (int i = 0; i < NUM_LR; i++) {
+            uint32_t lr = vcpuData[cpu].LR[i];
+            nameOut(os, csprintf("%s.vcpuData%d.LR%d", name(), cpu, i));
+            SERIALIZE_SCALAR(lr);
+        }
+    }
+}
+
+void VGic::unserialize(Checkpoint *cp, const std::string &section)
+{
+    DPRINTF(Checkpoint, "Unserializing Arm GIC\n");
+
+    Tick interrupt_time[VGIC_CPU_MAX];
+    UNSERIALIZE_ARRAY(interrupt_time, VGIC_CPU_MAX);
+    for (uint32_t cpu = 0; cpu < VGIC_CPU_MAX; cpu++) {
+        if (interrupt_time[cpu])
+            schedule(postVIntEvent[cpu], interrupt_time[cpu]);
+
+        uint32_t tmp;
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "vctrl_val", tmp);
+        vcpuData[cpu].vctrl = tmp;
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "hcr_val", tmp);
+        vcpuData[cpu].hcr = tmp;
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "eisr_val", vcpuData[cpu].eisr);
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "VMGrp0En_val", vcpuData[cpu].VMGrp0En);
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "VMGrp1En_val", vcpuData[cpu].VMGrp1En);
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "VMAckCtl_val", vcpuData[cpu].VMAckCtl);
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "VMFiqEn_val", vcpuData[cpu].VMFiqEn);
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "VMCBPR_val", vcpuData[cpu].VMCBPR);
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "VEM_val", vcpuData[cpu].VEM);
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "VMABP_val", vcpuData[cpu].VMABP);
+        paramIn(cp, csprintf("%s.vcpuData%d", section, cpu),
+                "VMPriMask_val", vcpuData[cpu].VMPriMask);
+
+        for (int i = 0; i < NUM_LR; i++) {
+            paramIn(cp, csprintf("%s.vcpuData%d.LR%d", section, cpu, i),
+                    "lr", tmp);
+            vcpuData[cpu].LR[i] = tmp;
+        }
+    }
+    UNSERIALIZE_ARRAY(maintIntPosted, VGIC_CPU_MAX);
+    UNSERIALIZE_ARRAY(vIntPosted, VGIC_CPU_MAX);
+    UNSERIALIZE_SCALAR(vcpuAddr);
+    UNSERIALIZE_SCALAR(hvAddr);
+    UNSERIALIZE_SCALAR(pioDelay);
+    UNSERIALIZE_SCALAR(maintInt);
+}
+
+VGic *
+VGicParams::create()
+{
+    return new VGic(this);
+}
diff --git a/src/dev/arm/vgic.hh b/src/dev/arm/vgic.hh
new file mode 100644
index 000000000..e1c4960e9
--- /dev/null
+++ b/src/dev/arm/vgic.hh
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Matt Evans
+ */
+
+
+/** @file
+ * Implementiation of a GIC-400 List Register-based VGIC interface.
+ * The VGIC is, in this implementation, completely separate from the GIC itself.
+ * Only a VIRQ line to the CPU and a PPI line to the GIC (for a HV maintenance IRQ)
+ * is required.
+ *
+ * The mode in which the List Registers may flag (via LR.HW) that a hardware EOI
+ * is to be performed is NOT supported.  (This requires tighter integration with
+ * the GIC.)
+ */
+
+#ifndef __DEV_ARM_VGIC_H__
+#define __DEV_ARM_VGIC_H__
+
+#include "base/addr_range.hh"
+#include "base/bitunion.hh"
+#include "cpu/intr_control.hh"
+#include "dev/io_device.hh"
+#include "dev/platform.hh"
+#include "params/VGic.hh"
+
+class VGic : public PioDevice
+{
+  private:
+    static const int VGIC_CPU_MAX       = 256;
+    static const int NUM_LR             = 4;
+
+    static const int GICH_SIZE          = 0x200;
+    static const int GICH_REG_SIZE      = 0x2000;
+
+    static const int GICH_HCR           = 0x000;
+    static const int GICH_VTR           = 0x004;
+    static const int GICH_VMCR          = 0x008;
+    static const int GICH_MISR          = 0x010;
+    static const int GICH_EISR0         = 0x020;
+    static const int GICH_EISR1         = 0x024;
+    static const int GICH_ELSR0         = 0x030;
+    static const int GICH_ELSR1         = 0x034;
+    static const int GICH_APR0          = 0x0f0;
+    static const int GICH_LR0           = 0x100;
+    static const int GICH_LR1           = 0x104;
+    static const int GICH_LR2           = 0x108;
+    static const int GICH_LR3           = 0x10c;
+
+    static const int GICV_SIZE          = 0x2000;
+    static const int GICV_CTLR          = 0x000;
+    static const int GICV_PMR           = 0x004;
+    static const int GICV_BPR           = 0x008;
+    static const int GICV_IAR           = 0x00c;
+    static const int GICV_EOIR          = 0x010;
+    static const int GICV_RPR           = 0x014;
+    static const int GICV_HPPIR         = 0x018;
+    static const int GICV_ABPR          = 0x01c;
+    static const int GICV_AIAR          = 0x020;
+    static const int GICV_AEOIR         = 0x024;
+    static const int GICV_AHPPIR        = 0x028;
+    static const int GICV_APR0          = 0x0d0;
+    static const int GICV_IIDR          = 0x0fc;
+    static const int GICV_DIR           = 0x1000;
+
+    static const uint32_t LR_PENDING    = 1;
+    static const uint32_t LR_ACTIVE     = 2;
+
+    /** Event definition to post interrupt to CPU after a delay
+    */
+    class PostVIntEvent : public Event
+    {
+      private:
+        uint32_t cpu;
+        Platform *platform;
+      public:
+        PostVIntEvent( uint32_t c, Platform* p)
+            : cpu(c), platform(p)
+        { }
+        void process() { platform->intrctrl->post(cpu, ArmISA::INT_VIRT_IRQ, 0);}
+        const char *description() const { return "Post VInterrupt to CPU"; }
+    };
+
+    PostVIntEvent *postVIntEvent[VGIC_CPU_MAX];
+    bool        maintIntPosted[VGIC_CPU_MAX];
+    bool        vIntPosted[VGIC_CPU_MAX];
+
+    Platform *platform;
+    BaseGic *gic;
+
+    Addr vcpuAddr;
+    Addr hvAddr;
+    Tick pioDelay;
+    int maintInt;
+
+    BitUnion32(ListReg)
+    Bitfield<31> HW;
+    Bitfield<30> Grp1;
+    Bitfield<29,28> State;
+    Bitfield<27,23> Priority;
+    Bitfield<19> EOI;
+    Bitfield<12,10> CpuID;
+    Bitfield<9,0> VirtualID;
+    EndBitUnion(ListReg)
+
+    BitUnion32(HCR)
+    Bitfield<31,27> EOICount;
+    Bitfield<7> VGrp1DIE;
+    Bitfield<6> VGrp1EIE;
+    Bitfield<5> VGrp0DIE;
+    Bitfield<4> VGrp0EIE;
+    Bitfield<3> NPIE;
+    Bitfield<2> LRENPIE;
+    Bitfield<1> UIE;
+    Bitfield<0> En;
+    EndBitUnion(HCR)
+
+    BitUnion32(VCTLR)
+    Bitfield<9> EOImode;
+    Bitfield<4> CPBR;
+    Bitfield<3> FIQEn;
+    Bitfield<2> AckCtl;
+    Bitfield<1> EnGrp1;
+    Bitfield<0> En;     // This gets written to enable, not group 1.
+    EndBitUnion(VCTLR)
+
+    /* State per CPU.  EVERYTHING should be in this struct and simply replicated
+     * N times.
+     */
+    struct vcpuIntData {
+        ListReg LR[NUM_LR];
+        VCTLR vctrl;
+
+        HCR hcr;
+        uint64_t eisr;
+
+        /* Host info, guest info (should be 100% accessible via GICH_* regs!) */
+        uint8_t VMGrp0En;
+        uint8_t VMGrp1En;
+        uint8_t VMAckCtl;
+        uint8_t VMFiqEn;
+        uint8_t VMCBPR;
+        uint8_t VEM;
+        uint8_t VMABP;
+        uint8_t VMBP;
+        uint8_t VMPriMask;
+    };
+
+    struct vcpuIntData vcpuData[VGIC_CPU_MAX];
+
+  public:
+   typedef VGicParams Params;
+   const Params *
+    params() const
+    {
+        return dynamic_cast<const Params *>(_params);
+    }
+    VGic(const Params *p);
+
+    virtual AddrRangeList getAddrRanges() const;
+
+    virtual Tick read(PacketPtr pkt);
+    virtual Tick write(PacketPtr pkt);
+
+    virtual void serialize(std::ostream &os);
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+  private:
+    Tick readVCpu(PacketPtr pkt);
+    Tick readCtrl(PacketPtr pkt);
+
+    Tick writeVCpu(PacketPtr pkt);
+    Tick writeCtrl(PacketPtr pkt);
+
+    void updateIntState(int ctx_id);
+    uint32_t getMISR(struct vcpuIntData *vid);
+    void postVInt(uint32_t cpu, Tick when);
+    void unPostVInt(uint32_t cpu);
+    void postMaintInt(uint32_t cpu);
+    void unPostMaintInt(uint32_t cpu);
+
+    unsigned int lrPending(struct vcpuIntData *vid)
+    {
+        unsigned int pend = 0;
+        for (int i = 0; i < NUM_LR; i++) {
+            if (vid->LR[i].State & LR_PENDING)
+                pend++;
+        }
+        return pend;
+    }
+    unsigned int lrValid(struct vcpuIntData *vid)
+    {
+        unsigned int valid = 0;
+        for (int i = 0; i < NUM_LR; i++) {
+            if (vid->LR[i].State)
+                valid++;
+        }
+        return valid;
+    }
+
+    /** Returns LR index or -1 if none pending */
+    int findHighestPendingLR(struct vcpuIntData *vid)
+    {
+        unsigned int prio = 0xff;
+        int p = -1;
+        for (int i = 0; i < NUM_LR; i++) {
+            if ((vid->LR[i].State & LR_PENDING) && (vid->LR[i].Priority < prio)) {
+                p = i;
+                prio = vid->LR[i].Priority;
+            }
+        }
+        return p;
+    }
+
+    int findLRForVIRQ(struct vcpuIntData *vid, int virq, int vcpu)
+    {
+        for (int i = 0; i < NUM_LR; i++) {
+            if (vid->LR[i].State &&
+                vid->LR[i].VirtualID == virq &&
+                vid->LR[i].CpuID == vcpu)
+                return i;
+        }
+        return -1;
+    }
+};
+
+#endif
diff --git a/src/sim/System.py b/src/sim/System.py
index 302e2fa60..95162be89 100644
--- a/src/sim/System.py
+++ b/src/sim/System.py
@@ -86,4 +86,5 @@ class System(MemObject):
     readfile = Param.String("", "file to read startup script from")
     symbolfile = Param.String("", "file to get the symbols from")
     load_addr_mask = Param.UInt64(0xffffffffff,
-            "Address to mask loading binaries with");
+            "Address to mask loading binaries with")
+    load_offset = Param.UInt64(0, "Address to offset loading binaries with")
diff --git a/src/sim/process.cc b/src/sim/process.cc
index 1654ea5c5..ccaac2096 100644
--- a/src/sim/process.cc
+++ b/src/sim/process.cc
@@ -695,15 +695,22 @@ LiveProcess::create(LiveProcessParams * params)
         fatal("Unknown/unsupported operating system.");
     }
 #elif THE_ISA == ARM_ISA
-    if (objFile->getArch() != ObjectFile::Arm &&
-        objFile->getArch() != ObjectFile::Thumb)
+    ObjectFile::Arch arch = objFile->getArch();
+    if (arch != ObjectFile::Arm && arch != ObjectFile::Thumb &&
+        arch != ObjectFile::Arm64)
         fatal("Object file architecture does not match compiled ISA (ARM).");
     switch (objFile->getOpSys()) {
       case ObjectFile::UnknownOpSys:
         warn("Unknown operating system; assuming Linux.");
         // fall through
       case ObjectFile::Linux:
-        process = new ArmLinuxProcess(params, objFile, objFile->getArch());
+        if (arch == ObjectFile::Arm64) {
+            process = new ArmLinuxProcess64(params, objFile,
+                                            objFile->getArch());
+        } else {
+            process = new ArmLinuxProcess32(params, objFile,
+                                            objFile->getArch());
+        }
         break;
       case ObjectFile::LinuxArmOABI:
         fatal("M5 does not support ARM OABI binaries. Please recompile with an"
diff --git a/src/sim/serialize.hh b/src/sim/serialize.hh
index 6d4207090..bbf759cf6 100644
--- a/src/sim/serialize.hh
+++ b/src/sim/serialize.hh
@@ -58,7 +58,7 @@ class EventQueue;
  * SimObject shouldn't cause the version number to increase, only changes to
  * existing objects such as serializing/unserializing more state, changing sizes
  * of serialized arrays, etc. */
-static const uint64_t gem5CheckpointVersion = 0x0000000000000008;
+static const uint64_t gem5CheckpointVersion = 0x0000000000000009;
 
 template <class T>
 void paramOut(std::ostream &os, const std::string &name, const T &param);
diff --git a/src/sim/system.cc b/src/sim/system.cc
index 7de483216..e2bf0a3d2 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -79,6 +79,7 @@ System::System(Params *p)
       init_param(p->init_param),
       physProxy(_systemPort, p->cache_line_size),
       loadAddrMask(p->load_addr_mask),
+      loadAddrOffset(p->load_offset),
       nextPID(0),
       physmem(name() + ".physmem", p->memories),
       memoryMode(p->mem_mode),
@@ -274,14 +275,15 @@ System::initState()
          */
         if (params()->kernel != "")  {
             // Validate kernel mapping before loading binary
-            if (!(isMemAddr(kernelStart & loadAddrMask) &&
-                            isMemAddr(kernelEnd & loadAddrMask))) {
+            if (!(isMemAddr((kernelStart & loadAddrMask) + loadAddrOffset) &&
+                     isMemAddr((kernelEnd & loadAddrMask) + loadAddrOffset))) {
                 fatal("Kernel is mapped to invalid location (not memory). "
-                      "kernelStart 0x(%x) - kernelEnd 0x(%x)\n", kernelStart,
-                      kernelEnd);
+                      "kernelStart 0x(%x) - kernelEnd 0x(%x) %#x:%#x\n", kernelStart,
+                      kernelEnd, (kernelStart & loadAddrMask) + loadAddrOffset,
+                      (kernelEnd & loadAddrMask) + loadAddrOffset);
             }
             // Load program sections into memory
-            kernel->loadSections(physProxy, loadAddrMask);
+            kernel->loadSections(physProxy, loadAddrMask, loadAddrOffset);
 
             DPRINTF(Loader, "Kernel start = %#x\n", kernelStart);
             DPRINTF(Loader, "Kernel end   = %#x\n", kernelEnd);
diff --git a/src/sim/system.hh b/src/sim/system.hh
index c8945c8c1..ecef2c4f2 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -237,6 +237,13 @@ class System : public MemObject
      */
     Addr loadAddrMask;
 
+    /** Offset that should be used for binary/symbol loading.
+     * This further allows more flexibily than the loadAddrMask allows alone in
+     * loading kernels and similar. The loadAddrOffset is applied after the
+     * loadAddrMask.
+     */
+    Addr loadAddrOffset;
+
   protected:
     uint64_t nextPID;
 
@@ -321,7 +328,7 @@ class System : public MemObject
      * Called by pseudo_inst to track the number of work items completed by
      * this system.
      */
-    uint64_t 
+    uint64_t
     incWorkItemsEnd()
     {
         return ++workItemsEnd;
@@ -332,13 +339,13 @@ class System : public MemObject
      * Returns the total number of cpus that have executed work item begin or
      * ends.
      */
-    int 
+    int
     markWorkItem(int index)
     {
         int count = 0;
         assert(index < activeCpus.size());
         activeCpus[index] = true;
-        for (std::vector<bool>::iterator i = activeCpus.begin(); 
+        for (std::vector<bool>::iterator i = activeCpus.begin();
              i < activeCpus.end(); i++) {
             if (*i) count++;
         }
diff --git a/system/arm/aarch64_bootloader/LICENSE.txt b/system/arm/aarch64_bootloader/LICENSE.txt
new file mode 100644
index 000000000..d68a74e44
--- /dev/null
+++ b/system/arm/aarch64_bootloader/LICENSE.txt
@@ -0,0 +1,28 @@
+Copyright (c) 2012, ARM Limited
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of ARM nor the names of its contributors may be
+      used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/system/arm/aarch64_bootloader/boot.S b/system/arm/aarch64_bootloader/boot.S
new file mode 100644
index 000000000..78d9710d4
--- /dev/null
+++ b/system/arm/aarch64_bootloader/boot.S
@@ -0,0 +1,124 @@
+/*
+ * boot.S - simple register setup code for stand-alone Linux booting
+ *
+ * Copyright (C) 2012 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ */
+
+        .text
+
+        .globl	_start
+_start:
+        /*
+         * EL3 initialisation
+         */
+        mrs	x0, CurrentEL
+        cmp	x0, #0xc			// EL3?
+        b.ne	start_ns			// skip EL3 initialisation
+
+        mov	x0, #0x30			// RES1
+        orr	x0, x0, #(1 << 0)		// Non-secure EL1
+        orr	x0, x0, #(1 << 8)		// HVC enable
+        orr	x0, x0, #(1 << 10)		// 64-bit EL2
+        msr	scr_el3, x0
+
+        msr	cptr_el3, xzr			// Disable copro. traps to EL3
+
+        ldr	x0, =CNTFRQ
+        msr	cntfrq_el0, x0
+
+        /*
+         * Check for the primary CPU to avoid a race on the distributor
+         * registers.
+         */
+        mrs	x0, mpidr_el1
+        tst	x0, #15
+        b.ne	1f				// secondary CPU
+
+        ldr	x1, =GIC_DIST_BASE		// GICD_CTLR
+        mov	w0, #3				// EnableGrp0 | EnableGrp1
+        str	w0, [x1]
+
+1:	ldr	x1, =GIC_DIST_BASE + 0x80	// GICD_IGROUPR
+        mov	w0, #~0				// Grp1 interrupts
+        str	w0, [x1], #4
+        b.ne	2f				// Only local interrupts for secondary CPUs
+        str	w0, [x1], #4
+        str	w0, [x1], #4
+
+2:	ldr	x1, =GIC_CPU_BASE		// GICC_CTLR
+        ldr	w0, [x1]
+        mov	w0, #3				// EnableGrp0 | EnableGrp1
+        str	w0, [x1]
+
+        mov	w0, #1 << 7			// allow NS access to GICC_PMR
+        str	w0, [x1, #4]			// GICC_PMR
+
+        msr	sctlr_el2, xzr
+
+        /*
+         * Prepare the switch to the EL2_SP1 mode from EL3
+         */
+        ldr	x0, =start_ns			// Return after mode switch
+        mov	x1, #0x3c9			// EL2_SP1 | D | A | I | F
+        msr	elr_el3, x0
+        msr	spsr_el3, x1
+        eret
+
+start_ns:
+        /*
+         * Kernel parameters
+         */
+        mov	x0, xzr
+        mov	x1, xzr
+        mov	x2, xzr
+        mov	x3, xzr
+
+        mrs	x4, mpidr_el1
+        tst	x4, #15
+        b.eq	2f
+
+        /*
+         * Secondary CPUs
+         */
+1:	wfe
+        ldr	x4, =PHYS_OFFSET + 0xfff8
+        ldr     x4, [x4]
+        cbz	x4, 1b
+        br	x4				// branch to the given address
+
+2:
+        /*
+         * UART initialisation (38400 8N1)
+         */
+        ldr	x4, =UART_BASE			// UART base
+        mov	w5, #0x10			// ibrd
+        str	w5, [x4, #0x24]
+        mov	w5, #0xc300
+        orr	w5, w5, #0x0001			// cr
+        str	w5, [x4, #0x30]
+
+        /*
+         * CLCD output site MB
+         */
+        ldr	x4, =SYSREGS_BASE
+        ldr	w5, =(1 << 31) | (1 << 30) | (7 << 20) | (0 << 16)	// START|WRITE|MUXFPGA|SITE_MB
+        str	wzr, [x4, #0xa0]		// V2M_SYS_CFGDATA
+        str	w5, [x4, #0xa4]			// V2M_SYS_CFGCTRL
+
+        // set up the arch timer frequency
+        //ldr	x0, =CNTFRQ
+        //msr	cntfrq_el0, x0
+
+        /*
+         * Primary CPU
+         */
+        ldr	x0, =PHYS_OFFSET + 0x8000000	 // device tree blob
+        ldr     x6, =PHYS_OFFSET + 0x80000       // kernel start address
+        br	x6
+
+        .ltorg
+
+        .org	0x200
diff --git a/system/arm/aarch64_bootloader/makefile b/system/arm/aarch64_bootloader/makefile
new file mode 100644
index 000000000..4428d2620
--- /dev/null
+++ b/system/arm/aarch64_bootloader/makefile
@@ -0,0 +1,4 @@
+build:
+	aarch64-linux-gnu-gcc -c  -DPHYS_OFFSET=0x80000000 -DCNTFRQ=0x01800000 -DUART_BASE=0x1c090000 -DSYSREGS_BASE=0x1c010000 -DGIC_DIST_BASE=0x2c001000 -DGIC_CPU_BASE=0x2c002000 -Dkernel=0x80080000 -Dmbox=0x8000fff8 -Ddtb=0x80000100 -o boot_emm.o -march=armv8-a boot.S
+	aarch64-linux-gnu-ld -o boot_emm.arm64 -N -Ttext 0x00000010 boot_emm.o -non_shared -static
+	rm boot_emm.o
\ No newline at end of file
diff --git a/util/cpt_upgrader.py b/util/cpt_upgrader.py
index e6ee7d562..fac9e07a7 100755
--- a/util/cpt_upgrader.py
+++ b/util/cpt_upgrader.py
@@ -225,9 +225,325 @@ def from_7(cpt):
             # Search for all ISA sections
             if re.search('.*sys.*\.cpu.*\.isa', sec):
                 mr = cpt.get(sec, 'miscRegs').split()
-                # Add dummy value for MISCREG_TEEHBR
-                mr.insert(51,0);
-                cpt.set(sec, 'miscRegs', ' '.join(str(x) for x in mr))
+                if len(mr) == 161:
+                    print "MISCREG_TEEHBR already seems to be inserted."
+                else:
+                    # Add dummy value for MISCREG_TEEHBR
+                    mr.insert(51,0);
+                    cpt.set(sec, 'miscRegs', ' '.join(str(x) for x in mr))
+
+# Version 9 of the checkpoint adds an all ARMv8 state
+def from_8(cpt):
+    if cpt.get('root','isa') != 'arm':
+        return
+    import re
+    print "Warning: The size of the FP register file has changed. "\
+          "To get similar results you need to adjust the number of "\
+          "physical registers in the CPU you're restoring into by "\
+          "NNNN."
+    # Find the CPU context's and upgrade their registers
+    for sec in cpt.sections():
+        re_xc_match = re.match('^.*?sys.*?\.cpu(\d+)*\.xc\.*', sec)
+        if not re_xc_match:
+            continue
+
+        # Update floating point regs
+        fpr = cpt.get(sec, 'floatRegs.i').split()
+        # v8 has 128 normal fp and 32 special fp regs compared
+        # to v7's 64 normal fp and 8 special fp regs.
+        # Insert the extra normal fp registers at end of v7 normal fp regs
+        for x in xrange(64):
+            fpr.insert(64, "0")
+        # Append the extra special registers
+        for x in xrange(24):
+            fpr.append("0")
+        cpt.set(sec, 'floatRegs.i', ' '.join(str(x) for x in fpr))
+
+        ir = cpt.get(sec, 'intRegs').split()
+        # Add in v8 int reg state
+        # Splice in R13_HYP
+        ir.insert(20, "0")
+        # Splice in INTREG_DUMMY and SP0 - SP3
+        ir.extend(["0", "0", "0", "0", "0"])
+        cpt.set(sec, 'intRegs', ' '.join(str(x) for x in ir))
+
+    # Update the cpu interrupt field
+    for sec in cpt.sections():
+        re_int_match = re.match("^.*?sys.*?\.cpu(\d+)*$", sec)
+        if not re_int_match:
+            continue
+
+        irqs = cpt.get(sec, "interrupts").split()
+        irqs.append("false")
+        irqs.append("false")
+        cpt.set(sec, "interrupts", ' '.join(str(x) for x in irqs))
+
+    # Update the per cpu interrupt structure
+    for sec in cpt.sections():
+        re_int_match = re.match("^.*?sys.*?\.cpu(\d+)*\.interrupts$", sec)
+        if not re_int_match:
+            continue
+
+        irqs = cpt.get(sec, "interrupts").split()
+        irqs.append("false")
+        irqs.append("false")
+        cpt.set(sec, "interrupts", ' '.join(str(x) for x in irqs))
+
+    # Update the misc regs and add in new isa specific fields
+    for sec in cpt.sections():
+        re_isa_match = re.match("^.*?sys.*?\.cpu(\d+)*\.isa$", sec)
+        if not re_isa_match:
+            continue
+
+        cpt.set(sec, 'haveSecurity', 'false')
+        cpt.set(sec, 'haveLPAE', 'false')
+        cpt.set(sec, 'haveVirtualization', 'false')
+        cpt.set(sec, 'haveLargeAsid64', 'false')
+        cpt.set(sec, 'physAddrRange64', '40')
+
+        # splice in the new misc registers, ~200 -> 605 registers,
+        # ordering does not remain consistent
+        mr_old = cpt.get(sec, 'miscRegs').split()
+        mr_new = [ '0' for x in xrange(605) ]
+
+        # map old v7 miscRegs to new v8 miscRegs
+        mr_new[0] = mr_old[0] # CPSR
+        mr_new[16] = mr_old[1] # CPSR_Q
+        mr_new[1] = mr_old[2] # SPSR
+        mr_new[2] = mr_old[3] # SPSR_FIQ
+        mr_new[3] = mr_old[4] # SPSR_IRQ
+        mr_new[4] = mr_old[5] # SPSR_SVC
+        mr_new[5] = mr_old[6] # SPSR_MON
+        mr_new[8] = mr_old[7] # SPSR_UND
+        mr_new[6] = mr_old[8] # SPSR_ABT
+        mr_new[432] = mr_old[9] # FPSR
+        mr_new[10] = mr_old[10] # FPSID
+        mr_new[11] = mr_old[11] # FPSCR
+        mr_new[18] = mr_old[12] # FPSCR_QC
+        mr_new[17] = mr_old[13] # FPSCR_EXC
+        mr_new[14] = mr_old[14] # FPEXC
+        mr_new[13] = mr_old[15] # MVFR0
+        mr_new[12] = mr_old[16] # MVFR1
+        mr_new[28] = mr_old[17] # SCTLR_RST,
+        mr_new[29] = mr_old[18] # SEV_MAILBOX,
+        mr_new[30] = mr_old[19] # DBGDIDR
+        mr_new[31] = mr_old[20] # DBGDSCR_INT,
+        mr_new[33] = mr_old[21] # DBGDTRRX_INT,
+        mr_new[34] = mr_old[22] # DBGTRTX_INT,
+        mr_new[35] = mr_old[23] # DBGWFAR,
+        mr_new[36] = mr_old[24] # DBGVCR,
+        #mr_new[] = mr_old[25] # DBGECR -> UNUSED,
+        #mr_new[] = mr_old[26] # DBGDSCCR -> UNUSED,
+        #mr_new[] = mr_old[27] # DBGSMCR -> UNUSED,
+        mr_new[37] = mr_old[28] # DBGDTRRX_EXT,
+        mr_new[38] = mr_old[29] # DBGDSCR_EXT,
+        mr_new[39] = mr_old[30] # DBGDTRTX_EXT,
+        #mr_new[] = mr_old[31] # DBGDRCR -> UNUSED,
+        mr_new[41] = mr_old[32] # DBGBVR,
+        mr_new[47] = mr_old[33] # DBGBCR,
+        #mr_new[] = mr_old[34] # DBGBVR_M -> UNUSED,
+        #mr_new[] = mr_old[35] # DBGBCR_M -> UNUSED,
+        mr_new[61] = mr_old[36] # DBGDRAR,
+        #mr_new[] = mr_old[37] # DBGBXVR_M -> UNUSED,
+        mr_new[64] = mr_old[38] # DBGOSLAR,
+        #mr_new[] = mr_old[39] # DBGOSSRR -> UNUSED,
+        mr_new[66] = mr_old[40] # DBGOSDLR,
+        mr_new[67] = mr_old[41] # DBGPRCR,
+        #mr_new[] = mr_old[42] # DBGPRSR -> UNUSED,
+        mr_new[68] = mr_old[43] # DBGDSAR,
+        #mr_new[] = mr_old[44] # DBGITCTRL -> UNUSED,
+        mr_new[69] = mr_old[45] # DBGCLAIMSET,
+        mr_new[70] = mr_old[46] # DBGCLAIMCLR,
+        mr_new[71] = mr_old[47] # DBGAUTHSTATUS,
+        mr_new[72] = mr_old[48] # DBGDEVID2,
+        mr_new[73] = mr_old[49] # DBGDEVID1,
+        mr_new[74] = mr_old[50] # DBGDEVID,
+        mr_new[77] = mr_old[51] # TEEHBR,
+        mr_new[109] = mr_old[52] # v7 SCTLR -> aarc32 SCTLR_NS
+        mr_new[189] = mr_old[53] # DCCISW,
+        mr_new[188] = mr_old[54] # DCCIMVAC,
+        mr_new[183] = mr_old[55] # DCCMVAC,
+        mr_new[271] = mr_old[56] # v7 CONTEXTIDR -> aarch32 CONTEXTIDR_NS,
+        mr_new[274] = mr_old[57] # v7 TPIDRURW -> aarch32 TPIDRURW_NS,
+        mr_new[277] = mr_old[58] # v7 TPIDRURO -> aarch32 TPIDRURO_NS,
+        mr_new[280] = mr_old[59] # v7 TPIDRPRW -> aarch32 TPIDRPRW_NS,
+        mr_new[170] = mr_old[60] # CP15ISB,
+        mr_new[185] = mr_old[61] # CP15DSB,
+        mr_new[186] = mr_old[62] # CP15DMB,
+        mr_new[114] = mr_old[63] # CPACR,
+        mr_new[101] = mr_old[64] # CLIDR,
+        mr_new[100] = mr_old[65] # CCSIDR,
+        mr_new[104] = mr_old[66] # v7 CSSELR -> aarch32 CSSELR_NS,
+        mr_new[163] = mr_old[67] # ICIALLUIS,
+        mr_new[168] = mr_old[68] # ICIALLU,
+        mr_new[169] = mr_old[69] # ICIMVAU,
+        mr_new[172] = mr_old[70] # BPIMVA,
+        mr_new[164] = mr_old[71] # BPIALLIS,
+        mr_new[171] = mr_old[72] # BPIALL,
+        mr_new[80] = mr_old[73] # MIDR,
+        mr_new[126] = mr_old[74] # v7 TTBR0 -> aarch32 TTBR0_NS,
+        mr_new[129] = mr_old[75] # v7 TTBR1 -> aarch32 TTBR1_NS,
+        mr_new[83] = mr_old[76] # TLBTR,
+        mr_new[137] = mr_old[77] # v7 DACR -> aarch32 DACR_NS,
+        mr_new[192] = mr_old[78] # TLBIALLIS,
+        mr_new[193] = mr_old[79] # TLBIMVAIS,
+        mr_new[194] = mr_old[80] # TLBIASIDIS,
+        mr_new[195] = mr_old[81] # TLBIMVAAIS,
+        mr_new[198] = mr_old[82] # ITLBIALL,
+        mr_new[199] = mr_old[83] # ITLBIMVA,
+        mr_new[200] = mr_old[84] # ITLBIASID,
+        mr_new[201] = mr_old[85] # DTLBIALL,
+        mr_new[202] = mr_old[86] # DTLBIMVA,
+        mr_new[203] = mr_old[87] # DTLBIASID,
+        mr_new[204] = mr_old[88] # TLBIALL,
+        mr_new[205] = mr_old[89] # TLBIMVA,
+        mr_new[206] = mr_old[90] # TLBIASID,
+        mr_new[207] = mr_old[91] # TLBIMVAA,
+        mr_new[140] = mr_old[92] # v7 DFSR -> aarch32 DFSR_NS,
+        mr_new[143] = mr_old[93] # v7 IFSR -> aarch32 IFSR_NS,
+        mr_new[155] = mr_old[94] # v7 DFAR -> aarch32 DFAR_NS,
+        mr_new[158] = mr_old[95] # v7 IFAR -> aarch32 IFAR_NS,
+        mr_new[84] = mr_old[96] # MPIDR,
+        mr_new[241] = mr_old[97] # v7 PRRR -> aarch32 PRRR_NS,
+        mr_new[247] = mr_old[98] # v7 NMRR -> aarch32 NMRR_NS,
+        mr_new[131] = mr_old[99] # TTBCR,
+        mr_new[86] = mr_old[100] # ID_PFR0,
+        mr_new[81] = mr_old[101] # CTR,
+        mr_new[115] = mr_old[102] # SCR,
+        # Set the non-secure bit
+        scr = int(mr_new[115])
+        scr = scr | 0x1
+        mr_new[115] = str(scr)
+        ###
+        mr_new[116] = mr_old[103] # SDER,
+        mr_new[165] = mr_old[104] # PAR,
+        mr_new[175] = mr_old[105] # V2PCWPR -> ATS1CPR,
+        mr_new[176] = mr_old[106] # V2PCWPW -> ATS1CPW,
+        mr_new[177] = mr_old[107] # V2PCWUR -> ATS1CUR,
+        mr_new[178] = mr_old[108] # V2PCWUW -> ATS1CUW,
+        mr_new[179] = mr_old[109] # V2POWPR -> ATS12NSOPR,
+        mr_new[180] = mr_old[110] # V2POWPW -> ATS12NSOPW,
+        mr_new[181] = mr_old[111] # V2POWUR -> ATS12NSOUR,
+        mr_new[182] = mr_old[112] # V2POWUW -> ATS12NWOUW,
+        mr_new[90] = mr_old[113] # ID_MMFR0,
+        mr_new[92] = mr_old[114] # ID_MMFR2,
+        mr_new[93] = mr_old[115] # ID_MMFR3,
+        mr_new[112] = mr_old[116] # v7 ACTLR -> aarch32 ACTLR_NS
+        mr_new[222] = mr_old[117] # PMCR,
+        mr_new[230] = mr_old[118] # PMCCNTR,
+        mr_new[223] = mr_old[119] # PMCNTENSET,
+        mr_new[224] = mr_old[120] # PMCNTENCLR,
+        mr_new[225] = mr_old[121] # PMOVSR,
+        mr_new[226] = mr_old[122] # PMSWINC,
+        mr_new[227] = mr_old[123] # PMSELR,
+        mr_new[228] = mr_old[124] # PMCEID0,
+        mr_new[229] = mr_old[125] # PMCEID1,
+        mr_new[231] = mr_old[126] # PMXEVTYPER,
+        mr_new[233] = mr_old[127] # PMXEVCNTR,
+        mr_new[234] = mr_old[128] # PMUSERENR,
+        mr_new[235] = mr_old[129] # PMINTENSET,
+        mr_new[236] = mr_old[130] # PMINTENCLR,
+        mr_new[94] = mr_old[131] # ID_ISAR0,
+        mr_new[95] = mr_old[132] # ID_ISAR1,
+        mr_new[96] = mr_old[133] # ID_ISAR2,
+        mr_new[97] = mr_old[134] # ID_ISAR3,
+        mr_new[98] = mr_old[135] # ID_ISAR4,
+        mr_new[99] = mr_old[136] # ID_ISAR5,
+        mr_new[20] = mr_old[137] # LOCKFLAG,
+        mr_new[19] = mr_old[138] # LOCKADDR,
+        mr_new[87] = mr_old[139] # ID_PFR1,
+        # Set up the processor features register
+        pfr = int(mr_new[87])
+        pfr = pfr | 0x1011
+        mr_new[87] = str(pfr)
+        ###
+        mr_new[238] = mr_old[140] # L2CTLR,
+        mr_new[82] = mr_old[141] # TCMTR
+        mr_new[88] = mr_old[142] # ID_DFR0,
+        mr_new[89] = mr_old[143] # ID_AFR0,
+        mr_new[91] = mr_old[144] # ID_MMFR1,
+        mr_new[102] = mr_old[145] # AIDR,
+        mr_new[146] = mr_old[146] # v7 ADFSR -> aarch32 ADFSR_NS,
+        mr_new[148] = mr_old[147] # AIFSR,
+        mr_new[173] = mr_old[148] # DCIMVAC,
+        mr_new[174] = mr_old[149] # DCISW,
+        mr_new[184] = mr_old[150] # MCCSW -> DCCSW,
+        mr_new[187] = mr_old[151] # DCCMVAU,
+        mr_new[117] = mr_old[152] # NSACR,
+        mr_new[262] = mr_old[153] # VBAR,
+        mr_new[265] = mr_old[154] # MVBAR,
+        mr_new[267] = mr_old[155] # ISR,
+        mr_new[269] = mr_old[156] # FCEIDR -> FCSEIDR,
+        #mr_new[] = mr_old[157] # L2LATENCY -> UNUSED,
+        #mr_new[] = mr_old[158] # CRN15 -> UNUSED,
+        mr_new[599] = mr_old[159] # NOP
+        mr_new[600] = mr_old[160] # RAZ,
+
+        # Set the new miscRegs structure
+        cpt.set(sec, 'miscRegs', ' '.join(str(x) for x in mr_new))
+
+    cpu_prefix = {}
+    # Add in state for ITB/DTB
+    for sec in cpt.sections():
+        re_tlb_match = re.match('(^.*?sys.*?\.cpu(\d+)*)\.(dtb|itb)$', sec)
+        if not re_tlb_match:
+            continue
+
+        cpu_prefix[re_tlb_match.group(1)] = True # Save off prefix to add
+        # Set the non-secure bit (bit 9) to 1 for attributes
+        attr = int(cpt.get(sec, '_attr'))
+        attr = attr | 0x200
+        cpt.set(sec, '_attr', str(attr))
+        cpt.set(sec, 'haveLPAE', 'false')
+        cpt.set(sec, 'directToStage2', 'false')
+        cpt.set(sec, 'stage2Req', 'false')
+        cpt.set(sec, 'bootUncacheability', 'true')
+
+    # Add in extra state for the new TLB Entries
+    for sec in cpt.sections():
+        re_tlbentry_match = re.match('(^.*?sys.*?\.cpu(\d+)*)\.(dtb|itb).TlbEntry\d+$', sec)
+        if not re_tlbentry_match:
+            continue
+
+        # Add in the new entries
+        cpt.set(sec, 'longDescFormat', 'false')
+        cpt.set(sec, 'vmid', '0')
+        cpt.set(sec, 'isHyp', 'false')
+        valid = cpt.get(sec, 'valid')
+        if valid == 'true':
+            cpt.set(sec, 'ns', 'true')
+            cpt.set(sec, 'nstid', 'true')
+            cpt.set(sec, 'pxn', 'true')
+            cpt.set(sec, 'hap', '3')
+            # All v7 code used 2 level page tables
+            cpt.set(sec, 'lookupLevel', '2')
+            attr = int(cpt.get(sec, 'attributes'))
+            # set the non-secure bit (bit 9) to 1
+            # as no previous v7 code used secure code
+            attr = attr | 0x200
+            cpt.set(sec, 'attributes', str(attr))
+        else:
+            cpt.set(sec, 'ns', 'false')
+            cpt.set(sec, 'nstid', 'false')
+            cpt.set(sec, 'pxn', 'false')
+            cpt.set(sec, 'hap', '0')
+            cpt.set(sec, 'lookupLevel', '0')
+        cpt.set(sec, 'outerShareable', 'false')
+
+    # Add d/istage2_mmu and d/istage2_mmu.stage2_tlb
+    for key in cpu_prefix:
+        for suffix in ['.istage2_mmu', '.dstage2_mmu']:
+            new_sec = key + suffix
+            cpt.add_section(new_sec)
+            new_sec = key + suffix + ".stage2_tlb"
+            cpt.add_section(new_sec)
+            # Fill in tlb info with some defaults
+            cpt.set(new_sec, '_attr', '0')
+            cpt.set(new_sec, 'haveLPAE', 'false')
+            cpt.set(new_sec, 'directToStage2', 'false')
+            cpt.set(new_sec, 'stage2Req', 'false')
+            cpt.set(new_sec, 'bootUncacheability', 'false')
+            cpt.set(new_sec, 'num_entries', '0')
 
 
 migrations = []
@@ -239,6 +555,7 @@ migrations.append(from_4)
 migrations.append(from_5)
 migrations.append(from_6)
 migrations.append(from_7)
+migrations.append(from_8)
 
 verbose_print = False
 
diff --git a/util/m5/m5op_arm_A64.S b/util/m5/m5op_arm_A64.S
new file mode 100644
index 000000000..a422a3050
--- /dev/null
+++ b/util/m5/m5op_arm_A64.S
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2010-2013 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2003-2006 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Nathan Binkert
+ *          Ali Saidi
+ *          Chander Sudanthi
+ */
+
+#define m5_op 0xFF
+
+#include "m5ops.h"
+
+#define INST(op, ra, rb, func) \
+        .long (((op) << 24) | ((func) << 16) | ((ra) << 12) | (0x1 << 8) | (0x1 << 4) | (rb))
+/*               m5ops           m5func             ra         coproc 1       op=1       rb   */
+
+#define LEAF(func)    \
+        .globl  func; \
+func:
+
+#define RET           \
+        RET
+
+#define END(func)     \
+
+#define SIMPLE_OP(_f, _o)       \
+        LEAF(_f)                \
+                _o;             \
+                RET;            \
+        END(_f)
+
+#define ARM INST(m5_op, 0, 0, arm_func)
+#define QUIESCE INST(m5_op, 0, 0, quiesce_func)
+#define QUIESCENS INST(m5_op, 0, 0, quiescens_func)
+#define QUIESCECYC INST(m5_op, 0, 0, quiescecycle_func)
+#define QUIESCETIME INST(m5_op, 0, 0, quiescetime_func)
+#define RPNS INST(m5_op, 0, 0, rpns_func)
+#define WAKE_CPU INST(m5_op, 0, 0, wakecpu_func)
+#define M5EXIT INST(m5_op, 0, 0, exit_func)
+#define INITPARAM INST(m5_op, 0, 0, initparam_func)
+#define LOADSYMBOL INST(m5_op, 0, 0, loadsymbol_func)
+#define RESET_STATS INST(m5_op, 0, 0, resetstats_func)
+#define DUMP_STATS INST(m5_op, 0, 0, dumpstats_func)
+#define DUMPRST_STATS INST(m5_op, 0, 0, dumprststats_func)
+#define CHECKPOINT INST(m5_op, 0, 0, ckpt_func)
+#define READFILE INST(m5_op, 0, 0, readfile_func)
+#define WRITEFILE INST(m5_op, 0, 0, writefile_func)
+#define DEBUGBREAK INST(m5_op, 0, 0, debugbreak_func)
+#define SWITCHCPU INST(m5_op, 0, 0, switchcpu_func)
+#define ADDSYMBOL INST(m5_op, 0, 0, addsymbol_func)
+#define PANIC INST(m5_op, 0, 0, panic_func)
+#define WORK_BEGIN INST(m5_op, 0, 0, work_begin_func)
+#define WORK_END INST(m5_op, 0, 0, work_end_func)
+
+#define AN_BSM INST(m5_op, an_bsm, 0, annotate_func)
+#define AN_ESM INST(m5_op, an_esm, 0, annotate_func)
+#define AN_BEGIN INST(m5_op, an_begin, 0, annotate_func)
+#define AN_END INST(m5_op, an_end, 0, annotate_func)
+#define AN_Q INST(m5_op, an_q, 0, annotate_func)
+#define AN_RQ INST(m5_op, an_rq, 0, annotate_func)
+#define AN_DQ INST(m5_op, an_dq, 0, annotate_func)
+#define AN_WF INST(m5_op, an_wf, 0, annotate_func)
+#define AN_WE INST(m5_op, an_we, 0, annotate_func)
+#define AN_WS INST(m5_op, an_ws, 0, annotate_func)
+#define AN_SQ INST(m5_op, an_sq, 0, annotate_func)
+#define AN_AQ INST(m5_op, an_aq, 0, annotate_func)
+#define AN_PQ INST(m5_op, an_pq, 0, annotate_func)
+#define AN_L INST(m5_op, an_l, 0, annotate_func)
+#define AN_IDENTIFY INST(m5_op, an_identify, 0, annotate_func)
+#define AN_GETID INST(m5_op, an_getid, 0, annotate_func)
+
+.text
+
+SIMPLE_OP(arm, ARM)
+SIMPLE_OP(quiesce, QUIESCE)
+SIMPLE_OP(quiesceNs, QUIESCENS)
+SIMPLE_OP(quiesceCycle, QUIESCECYC)
+SIMPLE_OP(quiesceTime, QUIESCETIME)
+SIMPLE_OP(rpns, RPNS)
+SIMPLE_OP(wakeCPU, WAKE_CPU)
+SIMPLE_OP(m5_exit, M5EXIT)
+SIMPLE_OP(m5_initparam, INITPARAM)
+SIMPLE_OP(m5_loadsymbol, LOADSYMBOL)
+SIMPLE_OP(m5_reset_stats, RESET_STATS)
+SIMPLE_OP(m5_dump_stats, DUMP_STATS)
+SIMPLE_OP(m5_dumpreset_stats, DUMPRST_STATS)
+SIMPLE_OP(m5_checkpoint, CHECKPOINT)
+SIMPLE_OP(m5_readfile, READFILE)
+SIMPLE_OP(m5_writefile, WRITEFILE)
+SIMPLE_OP(m5_debugbreak, DEBUGBREAK)
+SIMPLE_OP(m5_switchcpu, SWITCHCPU)
+SIMPLE_OP(m5_addsymbol, ADDSYMBOL)
+SIMPLE_OP(m5_panic, PANIC)
+SIMPLE_OP(m5_work_begin, WORK_BEGIN)
+SIMPLE_OP(m5_work_end, WORK_END)
+
+SIMPLE_OP(m5a_bsm, AN_BSM)
+SIMPLE_OP(m5a_esm, AN_ESM)
+SIMPLE_OP(m5a_begin, AN_BEGIN)
+SIMPLE_OP(m5a_end, AN_END)
+SIMPLE_OP(m5a_q, AN_Q)
+SIMPLE_OP(m5a_rq, AN_RQ)
+SIMPLE_OP(m5a_dq, AN_DQ)
+SIMPLE_OP(m5a_wf, AN_WF)
+SIMPLE_OP(m5a_we, AN_WE)
+SIMPLE_OP(m5a_ws, AN_WS)
+SIMPLE_OP(m5a_sq, AN_SQ)
+SIMPLE_OP(m5a_aq, AN_AQ)
+SIMPLE_OP(m5a_pq, AN_PQ)
+SIMPLE_OP(m5a_l, AN_L)
+SIMPLE_OP(m5a_identify, AN_IDENTIFY)
+SIMPLE_OP(m5a_getid, AN_GETID)
+