From 582ea4d5431f9fa9edbeb16835b04171647ea18b Mon Sep 17 00:00:00 2001 From: Nilay Vaish Date: Thu, 3 Nov 2011 22:52:21 -0500 Subject: [PATCH] x86: Add microop for fence This patch adds a new microop for memory barrier. The microop itself does nothing, but since it is marked as a memory barrier, the O3 CPU should flush all the pending loads and stores before the fence to the memory system. --- .../arithmetic/add_and_subtract.py | 36 +++++++++++++ .../arithmetic/increment_and_decrement.py | 8 +++ .../compare_and_test/bit_test.py | 24 +++++++++ .../general_purpose/data_transfer/xchg.py | 12 +++++ .../x86/isa/insts/general_purpose/logical.py | 28 ++++++++++ .../isa/insts/general_purpose/semaphores.py | 8 +++ src/arch/x86/isa/microops/specop.isa | 53 +++++++++++++++++++ 7 files changed, 169 insertions(+) diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py index 9fc3e9035..68031c76c 100644 --- a/src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py +++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/add_and_subtract.py @@ -67,18 +67,22 @@ def macroop ADD_P_I def macroop ADD_LOCKED_M_I { limm t2, imm + mfence ldstl t1, seg, sib, disp add t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop ADD_LOCKED_P_I { rdip t7 limm t2, imm + mfence ldstl t1, seg, riprel, disp add t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop ADD_M_R @@ -98,17 +102,21 @@ def macroop ADD_P_R def macroop ADD_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp add t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop ADD_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp add t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop ADD_R_M @@ -168,18 +176,22 @@ def macroop SUB_P_I def macroop SUB_LOCKED_M_I { limm t2, imm + mfence ldstl t1, seg, sib, disp sub t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop SUB_LOCKED_P_I { rdip t7 limm t2, imm + mfence ldstl t1, seg, riprel, disp sub t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop SUB_M_R @@ -199,17 +211,21 @@ def macroop SUB_P_R def macroop SUB_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp sub t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop SUB_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp sub t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop ADC_R_R @@ -243,18 +259,22 @@ def macroop ADC_P_I def macroop ADC_LOCKED_M_I { limm t2, imm + mfence ldstl t1, seg, sib, disp adc t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop ADC_LOCKED_P_I { rdip t7 limm t2, imm + mfence ldstl t1, seg, riprel, disp adc t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop ADC_M_R @@ -274,17 +294,21 @@ def macroop ADC_P_R def macroop ADC_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp adc t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop ADC_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp adc t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop ADC_R_M @@ -344,18 +368,22 @@ def macroop SBB_P_I def macroop SBB_LOCKED_M_I { limm t2, imm + mfence ldstl t1, seg, sib, disp sbb t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop SBB_LOCKED_P_I { rdip t7 limm t2, imm + mfence ldstl t1, seg, riprel, disp sbb t1, t1, t2, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop SBB_M_R @@ -375,17 +403,21 @@ def macroop SBB_P_R def macroop SBB_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp sbb t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop SBB_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp sbb t1, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop NEG_R @@ -410,16 +442,20 @@ def macroop NEG_P def macroop NEG_LOCKED_M { + mfence ldstl t1, seg, sib, disp sub t1, t0, t1, flags=(CF,OF,SF,ZF,AF,PF) stul t1, seg, sib, disp + mfence }; def macroop NEG_LOCKED_P { rdip t7 + mfence ldstl t1, seg, riprel, disp sub t1, t0, t1, flags=(CF,OF,SF,ZF,AF,PF) stul t1, seg, riprel, disp + mfence }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py index f27cd7008..515082d64 100644 --- a/src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py +++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/increment_and_decrement.py @@ -58,17 +58,21 @@ def macroop INC_P def macroop INC_LOCKED_M { + mfence ldstl t1, seg, sib, disp addi t1, t1, 1, flags=(OF, SF, ZF, AF, PF) stul t1, seg, sib, disp + mfence }; def macroop INC_LOCKED_P { rdip t7 + mfence ldstl t1, seg, riprel, disp addi t1, t1, 1, flags=(OF, SF, ZF, AF, PF) stul t1, seg, riprel, disp + mfence }; def macroop DEC_R @@ -93,16 +97,20 @@ def macroop DEC_P def macroop DEC_LOCKED_M { + mfence ldstl t1, seg, sib, disp subi t1, t1, 1, flags=(OF, SF, ZF, AF, PF) stul t1, seg, sib, disp + mfence }; def macroop DEC_LOCKED_P { rdip t7 + mfence ldstl t1, seg, riprel, disp subi t1, t1, 1, flags=(OF, SF, ZF, AF, PF) stul t1, seg, riprel, disp + mfence }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py index 66eb0f8a2..f69e1dc48 100644 --- a/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py +++ b/src/arch/x86/isa/insts/general_purpose/compare_and_test/bit_test.py @@ -114,10 +114,12 @@ def macroop BTC_LOCKED_M_I { limm t1, imm, dataSize=asz limm t4, 1 roli t4, t4, imm + mfence ldstl t1, seg, sib, disp sexti t0, t1, imm, flags=(CF,) xor t1, t1, t4 stul t1, seg, sib, disp + mfence }; def macroop BTC_LOCKED_P_I { @@ -125,10 +127,12 @@ def macroop BTC_LOCKED_P_I { limm t1, imm, dataSize=asz limm t4, 1 roli t4, t4, imm + mfence ldstl t1, seg, riprel, disp sexti t0, t1, imm, flags=(CF,) xor t1, t1, t4 stul t1, seg, riprel, disp + mfence }; def macroop BTC_R_R { @@ -168,10 +172,12 @@ def macroop BTC_LOCKED_M_R { lea t3, flatseg, [dsz, t3, base], dataSize=asz limm t4, 1 rol t4, t4, reg + mfence ldstl t1, seg, [scale, index, t3], disp sext t0, t1, reg, flags=(CF,) xor t1, t1, t4 stul t1, seg, [scale, index, t3], disp + mfence }; def macroop BTC_LOCKED_P_R { @@ -180,10 +186,12 @@ def macroop BTC_LOCKED_P_R { srai t3, t2, ldsz, dataSize=asz limm t4, 1 rol t4, t4, reg + mfence ldstl t1, seg, [dsz, t3, t7], disp sext t0, t1, reg, flags=(CF,) xor t1, t1, t4 stul t1, seg, [dsz, t3, t7], disp + mfence }; def macroop BTR_R_I { @@ -218,10 +226,12 @@ def macroop BTR_LOCKED_M_I { limm t1, imm, dataSize=asz limm t4, "(uint64_t(-(2ULL)))" roli t4, t4, imm + mfence ldstl t1, seg, sib, disp sexti t0, t1, imm, flags=(CF,) and t1, t1, t4 stul t1, seg, sib, disp + mfence }; def macroop BTR_LOCKED_P_I { @@ -229,10 +239,12 @@ def macroop BTR_LOCKED_P_I { limm t1, imm, dataSize=asz limm t4, "(uint64_t(-(2ULL)))" roli t4, t4, imm + mfence ldstl t1, seg, riprel, disp sexti t0, t1, imm, flags=(CF,) and t1, t1, t4 stul t1, seg, riprel, disp + mfence }; def macroop BTR_R_R { @@ -272,10 +284,12 @@ def macroop BTR_LOCKED_M_R { lea t3, flatseg, [dsz, t3, base], dataSize=asz limm t4, "(uint64_t(-(2ULL)))" rol t4, t4, reg + mfence ldstl t1, seg, [scale, index, t3], disp sext t0, t1, reg, flags=(CF,) and t1, t1, t4 stul t1, seg, [scale, index, t3], disp + mfence }; def macroop BTR_LOCKED_P_R { @@ -284,10 +298,12 @@ def macroop BTR_LOCKED_P_R { srai t3, t2, ldsz, dataSize=asz limm t4, "(uint64_t(-(2ULL)))" rol t4, t4, reg + mfence ldstl t1, seg, [dsz, t3, t7], disp sext t0, t1, reg, flags=(CF,) and t1, t1, t4 stul t1, seg, [dsz, t3, t7], disp + mfence }; def macroop BTS_R_I { @@ -322,10 +338,12 @@ def macroop BTS_LOCKED_M_I { limm t1, imm, dataSize=asz limm t4, 1 roli t4, t4, imm + mfence ldstl t1, seg, sib, disp sexti t0, t1, imm, flags=(CF,) or t1, t1, t4 stul t1, seg, sib, disp + mfence }; def macroop BTS_LOCKED_P_I { @@ -333,10 +351,12 @@ def macroop BTS_LOCKED_P_I { limm t1, imm, dataSize=asz limm t4, 1 roli t4, t4, imm + mfence ldstl t1, seg, riprel, disp sexti t0, t1, imm, flags=(CF,) or t1, t1, t4 stul t1, seg, riprel, disp + mfence }; def macroop BTS_R_R { @@ -377,10 +397,12 @@ def macroop BTS_LOCKED_M_R { lea t3, flatseg, [dsz, t3, base], dataSize=asz limm t4, 1 rol t4, t4, reg + mfence ldstl t1, seg, [scale, index, t3], disp sext t0, t1, reg, flags=(CF,) or t1, t1, t4 stul t1, seg, [scale, index, t3], disp + mfence }; def macroop BTS_LOCKED_P_R { @@ -390,9 +412,11 @@ def macroop BTS_LOCKED_P_R { lea t3, flatseg, [dsz, t3, base], dataSize=asz limm t4, 1 rol t4, t4, reg + mfence ldstl t1, seg, [1, t3, t7], disp sext t0, t1, reg, flags=(CF,) or t1, t1, t4 stul t1, seg, [1, t3, t7], disp + mfence }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py index 6504b5ab4..1518ce5e0 100644 --- a/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py +++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/xchg.py @@ -50,46 +50,58 @@ def macroop XCHG_R_R def macroop XCHG_R_M { + mfence ldstl t1, seg, sib, disp stul reg, seg, sib, disp + mfence mov reg, reg, t1 }; def macroop XCHG_R_P { rdip t7 + mfence ldstl t1, seg, riprel, disp stul reg, seg, riprel, disp + mfence mov reg, reg, t1 }; def macroop XCHG_M_R { + mfence ldstl t1, seg, sib, disp stul reg, seg, sib, disp + mfence mov reg, reg, t1 }; def macroop XCHG_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp stul reg, seg, riprel, disp + mfence mov reg, reg, t1 }; def macroop XCHG_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp stul reg, seg, sib, disp + mfence mov reg, reg, t1 }; def macroop XCHG_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp stul reg, seg, riprel, disp + mfence mov reg, reg, t1 }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/logical.py b/src/arch/x86/isa/insts/general_purpose/logical.py index b8d442a02..49dea86e5 100644 --- a/src/arch/x86/isa/insts/general_purpose/logical.py +++ b/src/arch/x86/isa/insts/general_purpose/logical.py @@ -61,18 +61,22 @@ def macroop OR_P_I def macroop OR_LOCKED_M_I { limm t2, imm + mfence ldstl t1, seg, sib, disp or t1, t1, t2, flags=(OF,SF,ZF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop OR_LOCKED_P_I { limm t2, imm rdip t7 + mfence ldstl t1, seg, riprel, disp or t1, t1, t2, flags=(OF,SF,ZF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop OR_M_R @@ -92,17 +96,21 @@ def macroop OR_P_R def macroop OR_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp or t1, t1, reg, flags=(OF,SF,ZF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop OR_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp or t1, t1, reg, flags=(OF,SF,ZF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop OR_R_M @@ -155,18 +163,22 @@ def macroop XOR_P_I def macroop XOR_LOCKED_M_I { limm t2, imm + mfence ldstl t1, seg, sib, disp xor t1, t1, t2, flags=(OF,SF,ZF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop XOR_LOCKED_P_I { limm t2, imm rdip t7 + mfence ldstl t1, seg, riprel, disp xor t1, t1, t2, flags=(OF,SF,ZF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop XOR_M_R @@ -186,17 +198,21 @@ def macroop XOR_P_R def macroop XOR_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp xor t1, t1, reg, flags=(OF,SF,ZF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop XOR_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp xor t1, t1, reg, flags=(OF,SF,ZF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop XOR_R_M @@ -255,19 +271,23 @@ def macroop AND_P_I def macroop AND_LOCKED_M_I { + mfence ldstl t2, seg, sib, disp limm t1, imm and t2, t2, t1, flags=(OF,SF,ZF,PF,CF) stul t2, seg, sib, disp + mfence }; def macroop AND_LOCKED_P_I { rdip t7 + mfence ldstl t2, seg, riprel, disp limm t1, imm and t2, t2, t1, flags=(OF,SF,ZF,PF,CF) stul t2, seg, riprel, disp + mfence }; def macroop AND_M_R @@ -287,17 +307,21 @@ def macroop AND_P_R def macroop AND_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp and t1, t1, reg, flags=(OF,SF,ZF,PF,CF) stul t1, seg, sib, disp + mfence }; def macroop AND_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp and t1, t1, reg, flags=(OF,SF,ZF,PF,CF) stul t1, seg, riprel, disp + mfence }; def macroop NOT_R @@ -326,17 +350,21 @@ def macroop NOT_P def macroop NOT_LOCKED_M { limm t1, -1 + mfence ldstl t2, seg, sib, disp xor t2, t2, t1 stul t2, seg, sib, disp + mfence }; def macroop NOT_LOCKED_P { limm t1, -1 rdip t7 + mfence ldstl t2, seg, riprel, disp xor t2, t2, t1 stul t2, seg, riprel, disp + mfence }; ''' diff --git a/src/arch/x86/isa/insts/general_purpose/semaphores.py b/src/arch/x86/isa/insts/general_purpose/semaphores.py index 072e28de6..17bee7fb7 100644 --- a/src/arch/x86/isa/insts/general_purpose/semaphores.py +++ b/src/arch/x86/isa/insts/general_purpose/semaphores.py @@ -62,21 +62,25 @@ def macroop CMPXCHG_P_R { }; def macroop CMPXCHG_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) mov t1, t1, reg, flags=(CZF,) stul t1, seg, sib, disp + mfence mov rax, rax, t1, flags=(nCZF,) }; def macroop CMPXCHG_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp sub t0, rax, t1, flags=(OF, SF, ZF, AF, PF, CF) mov t1, t1, reg, flags=(CZF,) stul t1, seg, riprel, disp + mfence mov rax, rax, t1, flags=(nCZF,) }; @@ -96,17 +100,21 @@ def macroop XADD_P_R { }; def macroop XADD_LOCKED_M_R { + mfence ldstl t1, seg, sib, disp add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t2, seg, sib, disp + mfence mov reg, reg, t1 }; def macroop XADD_LOCKED_P_R { rdip t7 + mfence ldstl t1, seg, riprel, disp add t2, t1, reg, flags=(OF,SF,ZF,AF,PF,CF) stul t2, seg, riprel, disp + mfence mov reg, reg, t1 }; diff --git a/src/arch/x86/isa/microops/specop.isa b/src/arch/x86/isa/microops/specop.isa index 52420f175..5c242e2c9 100644 --- a/src/arch/x86/isa/microops/specop.isa +++ b/src/arch/x86/isa/microops/specop.isa @@ -1,4 +1,5 @@ // Copyright (c) 2007-2008 The Hewlett-Packard Development Company +// Copyright (c) 2011 Mark D. Hill and David A. Wood // All rights reserved. // // The license below extends only to copyright in the software and shall @@ -203,3 +204,55 @@ let {{ microopClasses["halt"] = Halt }}; + +def template MicroFenceOpDeclare {{ + class %(class_name)s : public X86ISA::X86MicroopBase + { + public: + %(class_name)s(ExtMachInst _machInst, + const char * instMnem, + uint64_t setFlags); + + %(BasicExecDeclare)s + }; +}}; + +def template MicroFenceOpConstructor {{ + inline %(class_name)s::%(class_name)s( + ExtMachInst machInst, const char * instMnem, uint64_t setFlags) : + %(base_class)s(machInst, "%(mnemonic)s", instMnem, + setFlags, %(op_class)s) + { + %(constructor)s; + } +}}; + +let {{ + class MfenceOp(X86Microop): + def __init__(self): + self.className = "Mfence" + self.mnemonic = "mfence" + self.instFlags = "| (1ULL << StaticInst::IsMemBarrier)" + + def getAllocator(self, microFlags): + allocString = ''' + (StaticInstPtr)(new %(class_name)s(machInst, + macrocodeBlock, %(flags)s)) + ''' + allocator = allocString % { + "class_name" : self.className, + "mnemonic" : self.mnemonic, + "flags" : self.microFlagsText(microFlags) + self.instFlags} + return allocator + + microopClasses["mfence"] = MfenceOp +}}; + +let {{ + # Build up the all register version of this micro op + iop = InstObjParams("mfence", "Mfence", 'X86MicroopBase', + {"code" : ""}) + header_output += MicroFenceOpDeclare.subst(iop) + decoder_output += MicroFenceOpConstructor.subst(iop) + exec_output += BasicExecute.subst(iop) +}};