File ghc.git-b29f20.patch of Package ghc

From b29f20edb1ca7f1763ceb001e2bb2d5f2f11bec3 Mon Sep 17 00:00:00 2001
From: Peter Trommler <ptrommler@acm.org>
Date: Fri, 2 Oct 2015 15:48:30 +0200
Subject: [PATCH] nativeGen PPC: fix > 16 bit offsets in stack handling

Implement access to spill slots at offsets larger than 16 bits.
Also allocation and deallocation of spill slots was restricted to
16 bit offsets. Now 32 bit offsets are supported on all PowerPC
platforms.

The implementation of 32 bit offsets requires more than one instruction
but the native code generator wants one instruction. So we implement
pseudo-instructions that are pretty printed into multiple assembly
instructions.

With pseudo-instructions for spill slot allocation and deallocation
we can also implement handling of the back chain pointer according
to the ELF ABIs.

Test Plan: validate (especially on powerpc (32 bit))

Reviewers: bgamari, austin, erikd

Reviewed By: erikd

Subscribers: thomie

Differential Revision: https://phabricator.haskell.org/D1296

GHC Trac Issues: #7830
---
 compiler/nativeGen/PPC/Instr.hs | 39 ++++++++++++++++++++++++++++-----------
 compiler/nativeGen/PPC/Ppr.hs   | 33 +++++++++++++++++++++++++++++++++
 compiler/nativeGen/PPC/Regs.hs  | 14 ++++++++++++--
 includes/CodeGen.Platform.hs    | 18 +++++++++++++-----
 4 files changed, 86 insertions(+), 18 deletions(-)

Index: ghc-7.10.2/compiler/nativeGen/PPC/Instr.hs
===================================================================
--- ghc-7.10.2.orig/compiler/nativeGen/PPC/Instr.hs
+++ ghc-7.10.2/compiler/nativeGen/PPC/Instr.hs
@@ -75,19 +75,19 @@ instance Instruction Instr where
 
 ppc_mkStackAllocInstr :: Platform -> Int -> Instr
 ppc_mkStackAllocInstr platform amount
-  = case platformArch platform of
-      ArchPPC    -> -- SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
-                    ADD sp sp (RIImm (ImmInt (-amount)))
-      ArchPPC_64 _ -> STU II64 sp (AddrRegImm sp (ImmInt (-amount)))
-      arch       -> panic $ "ppc_mkStackAllocInstr " ++ show arch
+  = ppc_mkStackAllocInstr' platform (-amount)
 
 ppc_mkStackDeallocInstr :: Platform -> Int -> Instr
 ppc_mkStackDeallocInstr platform amount
+  = ppc_mkStackAllocInstr' platform amount
+
+ppc_mkStackAllocInstr' :: Platform -> Int -> Instr
+ppc_mkStackAllocInstr' platform amount
   = case platformArch platform of
-      ArchPPC    -> -- ADD II32 (OpImm (ImmInt amount)) (OpReg esp)
-                    ADD sp sp (RIImm (ImmInt amount))
-      ArchPPC_64 _ -> ADD sp sp (RIImm (ImmInt amount))
-      arch       -> panic $ "ppc_mkStackDeallocInstr " ++ show arch
+    ArchPPC      -> UPDATE_SP II32 (ImmInt amount)
+    ArchPPC_64 _ -> UPDATE_SP II64 (ImmInt amount)
+    _            -> panic $ "ppc_mkStackAllocInstr' "
+                            ++ show (platformArch platform)
 
 --
 -- See note [extra spill slots] in X86/Instr.hs
@@ -187,8 +187,10 @@ data Instr
 
     -- Loads and stores.
     | LD      Size Reg AddrMode     -- Load size, dst, src
+    | LDFAR   Size Reg AddrMode     -- Load format, dst, src 32 bit offset
     | LA      Size Reg AddrMode     -- Load arithmetic size, dst, src
     | ST      Size Reg AddrMode     -- Store size, src, dst
+    | STFAR   Size Reg AddrMode     -- Store format, src, dst 32 bit offset
     | STU     Size Reg AddrMode     -- Store with Update size, src, dst
     | LIS     Reg Imm               -- Load Immediate Shifted dst, src
     | LI      Reg Imm               -- Load Immediate dst, src
@@ -278,6 +280,8 @@ data Instr
     | NOP                       -- no operation, PowerPC 64 bit
                                 -- needs this as place holder to
                                 -- reload TOC pointer
+    | UPDATE_SP Size Imm      -- expand/shrink spill area on C stack
+                                -- pseudo-instruction
 
 -- | Get the registers that are being used by this instruction.
 -- regUsage doesn't need to do any trickery for jumps and such.
@@ -289,8 +293,10 @@ ppc_regUsageOfInstr :: Platform -> Instr
 ppc_regUsageOfInstr platform instr
  = case instr of
     LD      _ reg addr       -> usage (regAddr addr, [reg])
+    LDFAR   _ reg addr       -> usage (regAddr addr, [reg])
     LA      _ reg addr       -> usage (regAddr addr, [reg])
     ST      _ reg addr       -> usage (reg : regAddr addr, [])
+    STFAR   _ reg addr       -> usage (reg : regAddr addr, [])
     STU     _ reg addr       -> usage (reg : regAddr addr, [])
     LIS     reg _            -> usage ([], [reg])
     LI      reg _            -> usage ([], [reg])
@@ -350,6 +356,7 @@ ppc_regUsageOfInstr platform instr
     MFLR    reg             -> usage ([], [reg])
     FETCHPC reg             -> usage ([], [reg])
     FETCHTOC reg _          -> usage ([], [reg])
+    UPDATE_SP _ _           -> usage ([], [sp])
     _                       -> noUsage
   where
     usage (src, dst) = RU (filter (interesting platform) src)
@@ -376,8 +383,10 @@ ppc_patchRegsOfInstr :: Instr -> (Reg ->
 ppc_patchRegsOfInstr instr env
  = case instr of
     LD      sz reg addr     -> LD sz (env reg) (fixAddr addr)
+    LDFAR   sz reg addr     -> LDFAR sz (env reg) (fixAddr addr)
     LA      sz reg addr     -> LA sz (env reg) (fixAddr addr)
     ST      sz reg addr     -> ST sz (env reg) (fixAddr addr)
+    STFAR   sz reg addr     -> STFAR sz (env reg) (fixAddr addr)
     STU     sz reg addr     -> STU sz (env reg) (fixAddr addr)
     LIS     reg imm         -> LIS (env reg) imm
     LI      reg imm         -> LI (env reg) imm
@@ -505,7 +514,11 @@ ppc_mkSpillInstr dflags reg delta slot
                                 _       -> II64
                 RcDouble  -> FF64
                 _         -> panic "PPC.Instr.mkSpillInstr: no match"
-    in ST sz reg (AddrRegImm sp (ImmInt (off-delta)))
+        instr = case makeImmediate W32 True (off-delta) of
+                Just _  -> ST
+                Nothing -> STFAR -- pseudo instruction: 32 bit offsets
+
+    in instr sz reg (AddrRegImm sp (ImmInt (off-delta)))
 
 
 ppc_mkLoadInstr
@@ -526,7 +539,11 @@ ppc_mkLoadInstr dflags reg delta slot
                                  _       -> II64
                 RcDouble  -> FF64
                 _         -> panic "PPC.Instr.mkLoadInstr: no match"
-    in LD sz reg (AddrRegImm sp (ImmInt (off-delta)))
+        instr = case makeImmediate W32 True (off-delta) of
+                Just _  -> LD
+                Nothing -> LDFAR -- pseudo instruction: 32 bit offsets
+
+    in instr sz reg (AddrRegImm sp (ImmInt (off-delta)))
 
 
 -- | The maximum number of bytes required to spill a register. PPC32
Index: ghc-7.10.2/compiler/nativeGen/PPC/Ppr.hs
===================================================================
--- ghc-7.10.2.orig/compiler/nativeGen/PPC/Ppr.hs
+++ ghc-7.10.2/compiler/nativeGen/PPC/Ppr.hs
@@ -437,6 +437,14 @@ pprInstr (LD sz reg addr) = hcat [
         ptext (sLit ", "),
         pprAddr addr
     ]
+pprInstr (LDFAR fmt reg (AddrRegImm source off)) =
+   sdocWithPlatform $ \platform -> vcat [
+         pprInstr (ADDIS (tmpReg platform) source (HA off)),
+         pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off)))
+    ]
+
+pprInstr (LDFAR _ _ _) =
+   panic "PPC.Ppr.pprInstr LDFAR: no match"
 pprInstr (LA sz reg addr) = hcat [
         char '\t',
         ptext (sLit "l"),
@@ -467,6 +475,14 @@ pprInstr (ST sz reg addr) = hcat [
         ptext (sLit ", "),
         pprAddr addr
     ]
+pprInstr (STFAR fmt reg (AddrRegImm source off)) =
+   sdocWithPlatform $ \platform -> vcat [
+         pprInstr (ADDIS (tmpReg platform) source (HA off)),
+         pprInstr (ST fmt reg (AddrRegImm (tmpReg platform) (LO off)))
+    ]
+
+pprInstr (STFAR _ _ _) =
+   panic "PPC.Ppr.pprInstr STFAR: no match"
 pprInstr (STU sz reg addr) = hcat [
         char '\t',
         ptext (sLit "st"),
@@ -799,6 +815,22 @@ pprInstr LWSYNC = ptext (sLit "\tlwsync"
 
 pprInstr NOP = ptext (sLit "\tnop")
 
+pprInstr (UPDATE_SP fmt amount@(ImmInt offset))
+   | fits16Bits offset = vcat [
+       pprInstr (LD fmt r0 (AddrRegImm sp (ImmInt 0))),
+       pprInstr (STU fmt r0 (AddrRegImm sp amount))
+     ]
+
+pprInstr (UPDATE_SP fmt amount)
+   = sdocWithPlatform $ \platform ->
+       let tmp = tmpReg platform in
+         vcat [
+           pprInstr (LD fmt r0 (AddrRegImm sp (ImmInt 0))),
+           pprInstr (ADDIS tmp sp (HA amount)),
+           pprInstr (ADD tmp tmp (RIImm (LO amount))),
+           pprInstr (STU fmt r0 (AddrRegReg sp tmp))
+         ]
+
 -- pprInstr _ = panic "pprInstr (ppc)"
 
 
Index: ghc-7.10.2/compiler/nativeGen/PPC/Regs.hs
===================================================================
--- ghc-7.10.2.orig/compiler/nativeGen/PPC/Regs.hs
+++ ghc-7.10.2/compiler/nativeGen/PPC/Regs.hs
@@ -37,7 +37,8 @@ module PPC.Regs (
         fits16Bits,
         makeImmediate,
         fReg,
-        sp, toc, r3, r4, r11, r12, r27, r28, r30,
+        r0, sp, toc, r3, r4, r11, r12, r27, r28, r30,
+        tmpReg,
         f1, f20, f21,
 
         allocatableRegs
@@ -306,7 +307,8 @@ point registers.
 fReg :: Int -> RegNo
 fReg x = (32 + x)
 
-sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg
+r0, sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg
+r0      = regSingle 0
 sp      = regSingle 1
 toc     = regSingle 2
 r3      = regSingle 3
@@ -327,3 +329,11 @@ allocatableRegs :: Platform -> [RealReg]
 allocatableRegs platform
    = let isFree i = isFastTrue (freeReg platform i)
      in  map RealRegSingle $ filter isFree allMachRegNos
+
+-- temporary register for compiler use
+tmpReg :: Platform -> Reg
+tmpReg platform =
+       case platformArch platform of
+       ArchPPC      -> regSingle 13
+       ArchPPC_64 _ -> regSingle 30
+       _            -> panic "PPC.Regs.tmpReg: unknowm arch"
Index: ghc-7.10.2/includes/CodeGen.Platform.hs
===================================================================
--- ghc-7.10.2.orig/includes/CodeGen.Platform.hs
+++ ghc-7.10.2/includes/CodeGen.Platform.hs
@@ -876,16 +876,26 @@ freeRegBase _ = fastBool True
 #elif MACHREGS_powerpc
 
 freeReg 0 = fastBool False -- Hack: r0 can't be used in all insns,
-                           -- but it's actually free
+                           -- in stack reallocations on Linux
+                           -- r0 is not usable in all insns so also reserved
+                           -- on Darwin.
 freeReg 1 = fastBool False -- The Stack Pointer
 # if !MACHREGS_darwin
 -- most non-darwin powerpc OSes use r2 as a TOC pointer or something like that
 freeReg 2 = fastBool False
 -- TODO: make this conditonal for ppc64 ELF
-freeReg 13 = fastBool False -- reserved for system thread ID
+freeReg 13 = fastBool False -- reserved for system thread ID on 64 bit
 -- TODO: do not reserve r30 in ppc64 ELF
 -- at least linux in -fPIC relies on r30 in PLT stubs
 freeReg 30 = fastBool False
+{- TODO: reserve r13 on 64 bit systems only and r30 on 32 bit respectively.
+   For now we use r30 on 64 bit and r13 on 32 bit as a temporary register
+   in stack handling code. See compiler/nativeGen/PPC/Ppr.hs.
+
+   Later we might want to reserve r13 and r30 only where it is required.
+   Then use r12 as temporary register, which is also what the C ABI does.
+-}
+
 # endif
 # ifdef REG_Base
 freeReg REG_Base = fastBool False
openSUSE Build Service is sponsored by