File 0001-PPC-Implement-Atomic-operations.patch of Package ghc
From d52808aa474ed9f51981ae35814c0a7ec36b5fea Mon Sep 17 00:00:00 2001
From: Peter Trommler <ptrommler@acm.org>
Date: Sat, 16 Sep 2017 13:52:04 +0200
Subject: [PATCH] PPC: Implement Atomic operations.
Fixes #12537
---
compiler/nativeGen/PPC/CodeGen.hs | 91 +++++++++++++++++++++++++++++++++++----
compiler/nativeGen/PPC/Instr.hs | 47 ++++++++++++--------
compiler/nativeGen/PPC/Ppr.hs | 56 ++++++++++++++++++++----
3 files changed, 159 insertions(+), 35 deletions(-)
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index df76211f3d..5ec8459926 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -160,8 +160,8 @@ stmtToInstrs stmt = do
-> genCCall target result_regs args
CmmBranch id -> genBranch id
- CmmCondBranch arg true false _ -> do
- b1 <- genCondJump true arg
+ CmmCondBranch arg true false prediction -> do
+ b1 <- genCondJump true arg prediction
b2 <- genBranch false
return (b1 `appOL` b2)
CmmSwitch arg ids -> do dflags <- getDynFlags
@@ -1069,11 +1069,12 @@ comparison to do.
genCondJump
:: BlockId -- the branch target
-> CmmExpr -- the condition on which to branch
+ -> Maybe Bool
-> NatM InstrBlock
-genCondJump id bool = do
+genCondJump id bool prediction = do
CondCode _ cond code <- getCondCode bool
- return (code `snocOL` BCC cond id)
+ return (code `snocOL` BCC cond id prediction)
@@ -1091,6 +1092,78 @@ genCCall :: ForeignTarget -- function to call
-> [CmmFormal] -- where to put the result
-> [CmmActual] -- arguments (of mixed type)
-> NatM InstrBlock
+genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ fmt = intFormat width
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ (instr, n_code) <- case amop of
+ AMO_Add -> getSomeRegOrImm ADD True reg_dst
+ AMO_Sub -> case n of
+ CmmLit (CmmInt i _)
+ | Just imm <- makeImmediate width True (-i)
+ -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
+ _
+ -> do
+ (n_reg, n_code) <- getSomeReg n
+ return (SUBF reg_dst n_reg reg_dst, n_code)
+ AMO_And -> getSomeRegOrImm AND False reg_dst
+ AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
+ return (NAND reg_dst reg_dst n_reg, n_code)
+ AMO_Or -> getSomeRegOrImm OR False reg_dst
+ AMO_Xor -> getSomeRegOrImm XOR False reg_dst
+ Amode addr_reg addr_code <- getAmodeIndex addr
+ lbl_retry <- getBlockIdNat
+ return $ n_code `appOL` addr_code
+ `appOL` toOL [ HWSYNC
+ , BCC ALWAYS lbl_retry Nothing
+ , NEWBLOCK lbl_retry
+ , LDR fmt reg_dst addr_reg
+ , instr
+ , STC fmt reg_dst addr_reg
+ , BCC NE lbl_retry (Just False)
+ , ISYNC
+ ]
+ where
+ getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
+ = do
+ (regX, codeX) <- getSomeReg x
+ (regY, codeY) <- getSomeReg y
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+ getAmodeIndex other
+ = do
+ (reg, code) <- getSomeReg other
+ return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
+ getSomeRegOrImm op sign dst
+ = case n of
+ CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
+ -> return (op dst dst (RIImm imm), nilOL)
+ _
+ -> do
+ (n_reg, n_code) <- getSomeReg n
+ return (op dst dst (RIReg n_reg), n_code)
+
+genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ fmt = intFormat width
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ form = if widthInBits width == 64 then DS else D
+ Amode addr_reg addr_code <- getAmode form addr
+ lbl_end <- getBlockIdNat
+ return $ addr_code `appOL` toOL [ HWSYNC
+ , LD fmt reg_dst addr_reg
+ , CMP fmt reg_dst (RIReg reg_dst)
+ , BCC NE lbl_end (Just False)
+ , BCC ALWAYS lbl_end Nothing
+ , NEWBLOCK lbl_end
+ , ISYNC
+ ]
+
+genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
+ code <- assignMem_IntCode (intFormat width) addr val
+ return $ unitOL(HWSYNC) `appOL` code
+
genCCall target dest_regs argsAndHints
= do dflags <- getDynFlags
let platform = targetPlatform dflags
@@ -1555,11 +1628,11 @@ genCCall' dflags gcp target dest_regs args
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
MO_Clz w -> (fsLit $ clzLabel w, False)
- MO_Ctz w -> (fsLit $ ctzLabel w, False)
- MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
+ MO_Ctz w -> (fsLit $ clzLabel w, False)
+ MO_AtomicRMW {} -> unsupported
MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
- MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
- MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
+ MO_AtomicRead _ -> unsupported
+ MO_AtomicWrite _ -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
@@ -1571,7 +1644,7 @@ genCCall' dflags gcp target dest_regs args
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- (MO_Prefetch_Data _ ) -> unsupported
+ MO_Prefetch_Data _ -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported")
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index 6baeb6c343..1315ad5a19 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -122,7 +122,7 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
insert_stack_insns (BasicBlock id insns)
| Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id [alloc, BCC ALWAYS new_blockid]
+ = [ BasicBlock id [alloc, BCC ALWAYS new_blockid Nothing]
, BasicBlock new_blockid block'
]
| otherwise
@@ -138,8 +138,8 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
JMP _ -> dealloc : insn : r
BCTR [] Nothing -> dealloc : insn : r
BCTR ids label -> BCTR (map (fmap retarget) ids) label : r
- BCCFAR cond b -> BCCFAR cond (retarget b) : r
- BCC cond b -> BCC cond (retarget b) : r
+ BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
+ BCC cond b p -> BCC cond (retarget b) p : r
_ -> insn : r
-- BL and BCTRL are call-like instructions rather than
-- jumps, and are used only for C calls.
@@ -188,10 +188,12 @@ data Instr
-- Loads and stores.
| LD Format Reg AddrMode -- Load format, dst, src
| LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset
+ | LDR Format Reg AddrMode -- Load and reserve format, dst, src
| LA Format Reg AddrMode -- Load arithmetic format, dst, src
| ST Format Reg AddrMode -- Store format, src, dst
| STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset
| STU Format Reg AddrMode -- Store with Update format, src, dst
+ | STC Format Reg AddrMode -- Store conditional format, src, dst
| LIS Reg Imm -- Load Immediate Shifted dst, src
| LI Reg Imm -- Load Immediate dst, src
| MR Reg Reg -- Move Register dst, src -- also for fmr
@@ -199,8 +201,8 @@ data Instr
| CMP Format Reg RI -- format, src1, src2
| CMPL Format Reg RI -- format, src1, src2
- | BCC Cond BlockId
- | BCCFAR Cond BlockId
+ | BCC Cond BlockId (Maybe Bool)
+ | BCCFAR Cond BlockId (Maybe Bool)
| JMP CLabel -- same as branch,
-- but with CLabel instead of block ID
| MTCTR Reg
@@ -237,6 +239,7 @@ data Instr
-- rlwinm dst, dst, 2, 31,31
| AND Reg Reg RI -- dst, src1, src2
+ | NAND Reg Reg Reg -- dst, src1, src2
| OR Reg Reg RI -- dst, src1, src2
| ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2
| XOR Reg Reg RI -- dst, src1, src2
@@ -276,6 +279,8 @@ data Instr
| FETCHTOC Reg CLabel -- pseudo-instruction
-- add TOC offset to address in r12
-- print .localentry for label
+ | HWSYNC -- heavy weight sync
+ | ISYNC -- instruction synchronize
| LWSYNC -- memory barrier
| NOP -- no operation, PowerPC 64 bit
-- needs this as place holder to
@@ -294,17 +299,19 @@ ppc_regUsageOfInstr platform instr
= case instr of
LD _ reg addr -> usage (regAddr addr, [reg])
LDFAR _ reg addr -> usage (regAddr addr, [reg])
+ LDR _ reg addr -> usage (regAddr addr, [reg])
LA _ reg addr -> usage (regAddr addr, [reg])
ST _ reg addr -> usage (reg : regAddr addr, [])
STFAR _ reg addr -> usage (reg : regAddr addr, [])
STU _ reg addr -> usage (reg : regAddr addr, [])
+ STC _ reg addr -> usage (reg : regAddr addr, [])
LIS reg _ -> usage ([], [reg])
LI reg _ -> usage ([], [reg])
MR reg1 reg2 -> usage ([reg2], [reg1])
CMP _ reg ri -> usage (reg : regRI ri,[])
CMPL _ reg ri -> usage (reg : regRI ri,[])
- BCC _ _ -> noUsage
- BCCFAR _ _ -> noUsage
+ BCC _ _ _ -> noUsage
+ BCCFAR _ _ _ -> noUsage
MTCTR reg -> usage ([reg],[])
BCTR _ _ -> noUsage
BL _ params -> usage (params, callClobberedRegs platform)
@@ -330,6 +337,7 @@ ppc_regUsageOfInstr platform instr
MULLD_MayOflo reg1 reg2 reg3
-> usage ([reg2,reg3], [reg1])
AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
ORIS reg1 reg2 _ -> usage ([reg2], [reg1])
XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
@@ -382,17 +390,19 @@ ppc_patchRegsOfInstr instr env
= case instr of
LD fmt reg addr -> LD fmt (env reg) (fixAddr addr)
LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr)
+ LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr)
LA fmt reg addr -> LA fmt (env reg) (fixAddr addr)
ST fmt reg addr -> ST fmt (env reg) (fixAddr addr)
STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr)
STU fmt reg addr -> STU fmt (env reg) (fixAddr addr)
+ STC fmt reg addr -> STC fmt (env reg) (fixAddr addr)
LIS reg imm -> LIS (env reg) imm
LI reg imm -> LI (env reg) imm
MR reg1 reg2 -> MR (env reg1) (env reg2)
CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri)
CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri)
- BCC cond lbl -> BCC cond lbl
- BCCFAR cond lbl -> BCCFAR cond lbl
+ BCC cond lbl p -> BCC cond lbl p
+ BCCFAR cond lbl p -> BCCFAR cond lbl p
MTCTR reg -> MTCTR (env reg)
BCTR targets lbl -> BCTR targets lbl
BL imm argRegs -> BL imm argRegs -- argument regs
@@ -416,6 +426,7 @@ ppc_patchRegsOfInstr instr env
MULLD_MayOflo reg1 reg2 reg3
-> MULLD_MayOflo (env reg1) (env reg2) (env reg3)
AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
+ NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3)
OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm
XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
@@ -476,8 +487,8 @@ ppc_isJumpishInstr instr
ppc_jumpDestsOfInstr :: Instr -> [BlockId]
ppc_jumpDestsOfInstr insn
= case insn of
- BCC _ id -> [id]
- BCCFAR _ id -> [id]
+ BCC _ id _ -> [id]
+ BCCFAR _ id _ -> [id]
BCTR targets _ -> [id | Just id <- targets]
_ -> []
@@ -488,8 +499,8 @@ ppc_jumpDestsOfInstr insn
ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
ppc_patchJumpInstr insn patchF
= case insn of
- BCC cc id -> BCC cc (patchF id)
- BCCFAR cc id -> BCCFAR cc (patchF id)
+ BCC cc id p -> BCC cc (patchF id) p
+ BCCFAR cc id p -> BCCFAR cc (patchF id) p
BCTR ids lbl -> BCTR (map (fmap patchF) ids) lbl
_ -> insn
@@ -636,7 +647,7 @@ ppc_mkJumpInstr
-> [Instr]
ppc_mkJumpInstr id
- = [BCC ALWAYS id]
+ = [BCC ALWAYS id Nothing]
-- | Take the source and destination from this reg -> reg move instruction
@@ -665,12 +676,12 @@ makeFarBranches info_env blocks
handleBlock addr (BasicBlock id instrs)
= BasicBlock id (zipWith makeFar [addr..] instrs)
- makeFar _ (BCC ALWAYS tgt) = BCC ALWAYS tgt
- makeFar addr (BCC cond tgt)
+ makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing
+ makeFar addr (BCC cond tgt p)
| abs (addr - targetAddr) >= nearLimit
- = BCCFAR cond tgt
+ = BCCFAR cond tgt p
| otherwise
- = BCC cond tgt
+ = BCC cond tgt p
where Just targetAddr = lookupUFM blockAddressMap tgt
makeFar _ other = other
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index b7d2196f44..8b7df65ff1 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -309,11 +309,13 @@ pprImm (HIGHESTA i)
pprAddr :: AddrMode -> SDoc
pprAddr (AddrRegReg r1 r2)
- = pprReg r1 <+> text ", " <+> pprReg r2
-
-pprAddr (AddrRegImm r1 (ImmInt i)) = hcat [ int i, char '(', pprReg r1, char ')' ]
-pprAddr (AddrRegImm r1 (ImmInteger i)) = hcat [ integer i, char '(', pprReg r1, char ')' ]
-pprAddr (AddrRegImm r1 imm) = hcat [ pprImm imm, char '(', pprReg r1, char ')' ]
+ = pprReg r1 <> char ',' <+> pprReg r2
+pprAddr (AddrRegImm r1 (ImmInt i))
+ = hcat [ int i, char '(', pprReg r1, char ')' ]
+pprAddr (AddrRegImm r1 (ImmInteger i))
+ = hcat [ integer i, char '(', pprReg r1, char ')' ]
+pprAddr (AddrRegImm r1 imm)
+ = hcat [ pprImm imm, char '(', pprReg r1, char ')' ]
pprSectionAlign :: Section -> SDoc
@@ -443,15 +445,27 @@ pprInstr (LD fmt reg addr) = hcat [
text ", ",
pprAddr addr
]
+
pprInstr (LDFAR fmt reg (AddrRegImm source off)) =
sdocWithPlatform $ \platform -> vcat [
pprInstr (ADDIS (tmpReg platform) source (HA off)),
pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off)))
]
-
pprInstr (LDFAR _ _ _) =
panic "PPC.Ppr.pprInstr LDFAR: no match"
+pprInstr (LDR fmt reg1 addr) = hcat [
+ text "\tl",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC.Ppr.Instr LDR: no match",
+ text "arx\t",
+ pprReg reg1,
+ text ", ",
+ pprAddr addr
+ ]
+
pprInstr (LA fmt reg addr) = hcat [
char '\t',
text "l",
@@ -502,6 +516,17 @@ pprInstr (STU fmt reg addr) = hcat [
text ", ",
pprAddr addr
]
+pprInstr (STC fmt reg1 addr) = hcat [
+ text "\tst",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC.Ppr.Instr LDR: no match",
+ text "cx.\t",
+ pprReg reg1,
+ text ", ",
+ pprAddr addr
+ ]
pprInstr (LIS reg imm) = hcat [
char '\t',
text "lis",
@@ -563,19 +588,25 @@ pprInstr (CMPL fmt reg ri) = hcat [
RIReg _ -> empty
RIImm _ -> char 'i'
]
-pprInstr (BCC cond blockid) = hcat [
+pprInstr (BCC cond blockid prediction) = hcat [
char '\t',
text "b",
pprCond cond,
+ pprPrediction prediction,
char '\t',
ppr lbl
]
where lbl = mkAsmTempLabel (getUnique blockid)
+ pprPrediction p = case p of
+ Nothing -> empty
+ Just True -> char '+'
+ Just False -> char '-'
-pprInstr (BCCFAR cond blockid) = vcat [
+pprInstr (BCCFAR cond blockid prediction) = vcat [
hcat [
text "\tb",
pprCond (condNegate cond),
+ neg_prediction,
text "\t$+8"
],
hcat [
@@ -584,6 +615,10 @@ pprInstr (BCCFAR cond blockid) = vcat [
]
]
where lbl = mkAsmTempLabel (getUnique blockid)
+ neg_prediction = case prediction of
+ Nothing -> empty
+ Just True -> char '-'
+ Just False -> char '+'
pprInstr (JMP lbl)
-- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL"
@@ -699,6 +734,7 @@ pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
pprImm imm
]
pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri
+pprInstr (NAND reg1 reg2 reg3) = pprLogic (sLit "nand") reg1 reg2 (RIReg reg3)
pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri
pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri
@@ -858,6 +894,10 @@ pprInstr (FETCHTOC reg lab) = vcat [
ppr lab]
]
+pprInstr HWSYNC = text "\tsync"
+
+pprInstr ISYNC = text "\tisync"
+
pprInstr LWSYNC = text "\tlwsync"
pprInstr NOP = text "\tnop"
--
2.12.3