llvm: 11.0.0 -> 11.0.1
This commit is contained in:
parent
816f990dda
commit
6655119ad1
@ -1731,7 +1731,7 @@ drwxr-xr-x root/root usr/lib/cmake/llvm/
|
||||
-rw-r--r-- root/root usr/lib/cmake/llvm/TensorFlowCompile.cmake
|
||||
-rw-r--r-- root/root usr/lib/cmake/llvm/UseLibtool.cmake
|
||||
-rw-r--r-- root/root usr/lib/cmake/llvm/VersionFromVCS.cmake
|
||||
lrwxrwxrwx root/root usr/lib/libLLVM-11.0.0.so -> libLLVM-11.so
|
||||
lrwxrwxrwx root/root usr/lib/libLLVM-11.0.1.so -> libLLVM-11.so
|
||||
-rwxr-xr-x root/root usr/lib/libLLVM-11.so
|
||||
lrwxrwxrwx root/root usr/lib/libLLVM.so -> libLLVM-11.so
|
||||
-rw-r--r-- root/root usr/lib/libLLVMAMDGPUAsmParser.a
|
||||
|
@ -1,7 +1,6 @@
|
||||
untrusted comment: verify with /etc/ports/opt.pub
|
||||
RWSE3ohX2g5d/T1pDeKt7SZ3Ca0bLtXj4Tw/ipxuoE+0K0hmol5e1jHEx3YG2PkR/iNKEHuIfDQsCIZBIfMx066+DLdPPQiJlA0=
|
||||
SHA256 (Pkgfile) = 055aff550f54ee796df76b7de3eac14c9035589ceff3e0ac8b97bbcbb8b4ce12
|
||||
SHA256 (.footprint) = 6844824bee1d49462e6594ae8f9f505d094c6a78b0d4d5a10df12d18413bdb46
|
||||
SHA256 (llvm-11.0.0.src.tar.xz) = 913f68c898dfb4a03b397c5e11c6a2f39d0f22ed7665c9cefa87a34423a72469
|
||||
RWSE3ohX2g5d/UURZHeBK6f46oT8MQ54w6W2Cm3hfwInDpYGxtIeF9Kfa2Jp+MZsBF4rwALnlsJuTHPVPfmfRzR/dVx+sgsDhA0=
|
||||
SHA256 (Pkgfile) = 60bc8805c7c140e189df6a583963b217dce6c204a303385819a83c9a6e34ae2e
|
||||
SHA256 (.footprint) = aa77e9f33edbb3babbce3ec9d8d387544edced3fc7371c218109fdb9fa730678
|
||||
SHA256 (llvm-11.0.1.src.tar.xz) = ccd87c254b6aebc5077e4e6977d08d4be888e7eb672c6630a26a15d58b59b528
|
||||
SHA256 (llvm-config.h) = 2227b83b904348e5530f3475c978e11cb0f05a85024781dcb49ac86b65582707
|
||||
SHA256 (stack-clash-fixes.patch) = bdcaa7559223bd42a381086f7cc23fc73f88ebb1966a7c235f897db0f73b7d20
|
||||
|
10
llvm/Pkgfile
10
llvm/Pkgfile
@ -4,19 +4,15 @@
|
||||
# Depends on: cmake libxml2 ninja
|
||||
|
||||
name=llvm
|
||||
version=11.0.0
|
||||
release=2
|
||||
version=11.0.1
|
||||
release=1
|
||||
source=(https://github.com/llvm/llvm-project/releases/download/llvmorg-$version/llvm-$version.src.tar.xz
|
||||
llvm-config.h
|
||||
stack-clash-fixes.patch)
|
||||
llvm-config.h)
|
||||
|
||||
build() {
|
||||
export CC=gcc
|
||||
export CXX=g++
|
||||
|
||||
# https://bugs.llvm.org/show_bug.cgi?id=48007
|
||||
patch -d $name-$version.src -p2 -i $SRC/stack-clash-fixes.patch
|
||||
|
||||
cmake -S $name-$version.src -B build -G Ninja \
|
||||
-D CMAKE_INSTALL_PREFIX=/usr \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
|
@ -1,870 +0,0 @@
|
||||
From a1e0363c7402f7aa58e24e0e6dfa447ebabc1910 Mon Sep 17 00:00:00 2001
|
||||
From: serge-sans-paille <sguelton@redhat.com>
|
||||
Date: Wed, 30 Sep 2020 11:35:00 +0200
|
||||
Subject: [PATCH 1/3] Fix limit behavior of dynamic alloca
|
||||
|
||||
When the allocation size is 0, we shouldn't probe. Within [1, PAGE_SIZE], we
|
||||
should probe once etc.
|
||||
|
||||
This fixes https://bugs.llvm.org/show_bug.cgi?id=47657
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D88548
|
||||
|
||||
(cherry picked from commit 9573c9f2a363da71b2c07a3add4e52721e6028a0)
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
|
||||
llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 8 ++++----
|
||||
2 files changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index fd1e6517dfac..f68ae4461fe3 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
|
||||
|
||||
BuildMI(testMBB, DL, TII->get(X86::JCC_1))
|
||||
.addMBB(tailMBB)
|
||||
- .addImm(X86::COND_L);
|
||||
+ .addImm(X86::COND_LE);
|
||||
testMBB->addSuccessor(blockMBB);
|
||||
testMBB->addSuccessor(tailMBB);
|
||||
|
||||
diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
|
||||
index bc4678564083..82fd67842c8a 100644
|
||||
--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
|
||||
+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
|
||||
@@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
|
||||
; CHECK-X86-64-NEXT: andq $-16, %rcx
|
||||
; CHECK-X86-64-NEXT: subq %rcx, %rax
|
||||
; CHECK-X86-64-NEXT: cmpq %rsp, %rax
|
||||
-; CHECK-X86-64-NEXT: jl .LBB0_3
|
||||
+; CHECK-X86-64-NEXT: jle .LBB0_3
|
||||
; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-X86-64-NEXT: movq $0, (%rsp)
|
||||
; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-X86-64-NEXT: cmpq %rsp, %rax
|
||||
-; CHECK-X86-64-NEXT: jge .LBB0_2
|
||||
+; CHECK-X86-64-NEXT: jg .LBB0_2
|
||||
; CHECK-X86-64-NEXT: .LBB0_3:
|
||||
; CHECK-X86-64-NEXT: movq %rax, %rsp
|
||||
; CHECK-X86-64-NEXT: movl $1, 4792(%rax)
|
||||
@@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
|
||||
; CHECK-X86-32-NEXT: andl $-16, %ecx
|
||||
; CHECK-X86-32-NEXT: subl %ecx, %eax
|
||||
; CHECK-X86-32-NEXT: cmpl %esp, %eax
|
||||
-; CHECK-X86-32-NEXT: jl .LBB0_3
|
||||
+; CHECK-X86-32-NEXT: jle .LBB0_3
|
||||
; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-X86-32-NEXT: movl $0, (%esp)
|
||||
; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
|
||||
; CHECK-X86-32-NEXT: cmpl %esp, %eax
|
||||
-; CHECK-X86-32-NEXT: jge .LBB0_2
|
||||
+; CHECK-X86-32-NEXT: jg .LBB0_2
|
||||
; CHECK-X86-32-NEXT: .LBB0_3:
|
||||
; CHECK-X86-32-NEXT: movl %eax, %esp
|
||||
; CHECK-X86-32-NEXT: movl $1, 4792(%eax)
|
||||
|
||||
From aac36687f7978f33751daf2870b5c812124ebfaf Mon Sep 17 00:00:00 2001
|
||||
From: serge-sans-paille <sguelton@redhat.com>
|
||||
Date: Thu, 23 Jul 2020 16:22:48 +0200
|
||||
Subject: [PATCH 2/3] Fix interaction between stack alignment and inline-asm
|
||||
stack clash protection
|
||||
|
||||
As reported in https://github.com/rust-lang/rust/issues/70143 alignment is not
|
||||
taken into account when doing the probing. Fix that by adjusting the first probe
|
||||
if the stack align is small, or by extending the dynamic probing if the
|
||||
alignment is large.
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D84419
|
||||
|
||||
(cherry picked from commit f2c6bfa350de142e4d63808d03335f69bd136d6a)
|
||||
---
|
||||
llvm/lib/Target/X86/X86FrameLowering.cpp | 222 ++++++++++++++++--
|
||||
llvm/lib/Target/X86/X86FrameLowering.h | 8 +-
|
||||
.../X86/stack-clash-large-large-align.ll | 88 +++++++
|
||||
.../CodeGen/X86/stack-clash-no-free-probe.ll | 27 ---
|
||||
.../stack-clash-small-alloc-medium-align.ll | 135 +++++++++++
|
||||
.../X86/stack-clash-small-large-align.ll | 83 +++++++
|
||||
6 files changed, 512 insertions(+), 51 deletions(-)
|
||||
create mode 100644 llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
|
||||
delete mode 100644 llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
|
||||
create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
|
||||
create mode 100644 llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
|
||||
index c7ca6fb2a4fc..db6b68659493 100644
|
||||
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
|
||||
@@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric(
|
||||
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
|
||||
uint64_t ProbeChunk = StackProbeSize * 8;
|
||||
|
||||
+ uint64_t MaxAlign =
|
||||
+ TRI->needsStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
|
||||
+
|
||||
// Synthesize a loop or unroll it, depending on the number of iterations.
|
||||
+ // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
|
||||
+ // between the unaligned rsp and current rsp.
|
||||
if (Offset > ProbeChunk) {
|
||||
- emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset);
|
||||
+ emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
|
||||
+ MaxAlign % StackProbeSize);
|
||||
} else {
|
||||
- emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset);
|
||||
+ emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
|
||||
+ MaxAlign % StackProbeSize);
|
||||
}
|
||||
}
|
||||
|
||||
void X86FrameLowering::emitStackProbeInlineGenericBlock(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
|
||||
- uint64_t Offset) const {
|
||||
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
|
||||
+ uint64_t AlignOffset) const {
|
||||
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
|
||||
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
|
||||
+
|
||||
uint64_t CurrentOffset = 0;
|
||||
- // 0 Thanks to return address being saved on the stack
|
||||
- uint64_t CurrentProbeOffset = 0;
|
||||
|
||||
- // For the first N - 1 pages, just probe. I tried to take advantage of
|
||||
+ assert(AlignOffset < StackProbeSize);
|
||||
+
|
||||
+ // If the offset is so small it fits within a page, there's nothing to do.
|
||||
+ if (StackProbeSize < Offset + AlignOffset) {
|
||||
+
|
||||
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
+ .addReg(StackPtr)
|
||||
+ .addImm(StackProbeSize - AlignOffset)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
+
|
||||
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
|
||||
+ .setMIFlag(MachineInstr::FrameSetup),
|
||||
+ StackPtr, false, 0)
|
||||
+ .addImm(0)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ NumFrameExtraProbe++;
|
||||
+ CurrentOffset = StackProbeSize - AlignOffset;
|
||||
+ }
|
||||
+
|
||||
+ // For the next N - 1 pages, just probe. I tried to take advantage of
|
||||
// natural probes but it implies much more logic and there was very few
|
||||
// interesting natural probes to interleave.
|
||||
while (CurrentOffset + StackProbeSize < Offset) {
|
||||
@@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
NumFrameExtraProbe++;
|
||||
CurrentOffset += StackProbeSize;
|
||||
- CurrentProbeOffset += StackProbeSize;
|
||||
}
|
||||
|
||||
+ // No need to probe the tail, it is smaller than a Page.
|
||||
uint64_t ChunkSize = Offset - CurrentOffset;
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
@@ -639,8 +665,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
|
||||
|
||||
void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
|
||||
- uint64_t Offset) const {
|
||||
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
|
||||
+ uint64_t AlignOffset) const {
|
||||
assert(Offset && "null offset");
|
||||
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
@@ -648,6 +674,26 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
||||
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
|
||||
|
||||
+ if (AlignOffset) {
|
||||
+ if (AlignOffset < StackProbeSize) {
|
||||
+ // Perform a first smaller allocation followed by a probe.
|
||||
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset);
|
||||
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr)
|
||||
+ .addReg(StackPtr)
|
||||
+ .addImm(AlignOffset)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
+
|
||||
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
|
||||
+ .setMIFlag(MachineInstr::FrameSetup),
|
||||
+ StackPtr, false, 0)
|
||||
+ .addImm(0)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ NumFrameExtraProbe++;
|
||||
+ Offset -= AlignOffset;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
// Synthesize a loop
|
||||
NumFrameLoopProbe++;
|
||||
const BasicBlock *LLVM_BB = MBB.getBasicBlock();
|
||||
@@ -666,8 +712,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
||||
|
||||
// save loop bound
|
||||
{
|
||||
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset);
|
||||
- BuildMI(MBB, MBBI, DL, TII.get(Opc), FinalStackProbed)
|
||||
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset);
|
||||
+ BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
|
||||
.addReg(FinalStackProbed)
|
||||
.addImm(Offset / StackProbeSize * StackProbeSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
@@ -675,8 +721,8 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
||||
|
||||
// allocate a page
|
||||
{
|
||||
- const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
|
||||
- BuildMI(testMBB, DL, TII.get(Opc), StackPtr)
|
||||
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
|
||||
+ BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addImm(StackProbeSize)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
@@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
|
||||
uint64_t MaxAlign) const {
|
||||
uint64_t Val = -MaxAlign;
|
||||
unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
|
||||
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
|
||||
- .addReg(Reg)
|
||||
- .addImm(Val)
|
||||
- .setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
- // The EFLAGS implicit def is dead.
|
||||
- MI->getOperand(3).setIsDead();
|
||||
+ MachineFunction &MF = *MBB.getParent();
|
||||
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
+ const X86TargetLowering &TLI = *STI.getTargetLowering();
|
||||
+ const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
|
||||
+ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
|
||||
+
|
||||
+ // We want to make sure that (in worst case) less than StackProbeSize bytes
|
||||
+ // are not probed after the AND. This assumption is used in
|
||||
+ // emitStackProbeInlineGeneric.
|
||||
+ if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
|
||||
+ {
|
||||
+ NumFrameLoopProbe++;
|
||||
+ MachineBasicBlock *entryMBB =
|
||||
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
+ MachineBasicBlock *headMBB =
|
||||
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
+ MachineBasicBlock *bodyMBB =
|
||||
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
+ MachineBasicBlock *footMBB =
|
||||
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
+
|
||||
+ MachineFunction::iterator MBBIter = MBB.getIterator();
|
||||
+ MF.insert(MBBIter, entryMBB);
|
||||
+ MF.insert(MBBIter, headMBB);
|
||||
+ MF.insert(MBBIter, bodyMBB);
|
||||
+ MF.insert(MBBIter, footMBB);
|
||||
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
|
||||
+
|
||||
+ // Setup entry block
|
||||
+ {
|
||||
+
|
||||
+ entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
|
||||
+ BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
|
||||
+ .addReg(StackPtr)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ MachineInstr *MI =
|
||||
+ BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
|
||||
+ .addReg(FinalStackProbed)
|
||||
+ .addImm(Val)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+
|
||||
+ // The EFLAGS implicit def is dead.
|
||||
+ MI->getOperand(3).setIsDead();
|
||||
+
|
||||
+ BuildMI(entryMBB, DL,
|
||||
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
|
||||
+ .addReg(FinalStackProbed)
|
||||
+ .addReg(StackPtr)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
|
||||
+ .addMBB(&MBB)
|
||||
+ .addImm(X86::COND_E)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ entryMBB->addSuccessor(headMBB);
|
||||
+ entryMBB->addSuccessor(&MBB);
|
||||
+ }
|
||||
+
|
||||
+ // Loop entry block
|
||||
+
|
||||
+ {
|
||||
+ const unsigned SUBOpc =
|
||||
+ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
|
||||
+ BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
|
||||
+ .addReg(StackPtr)
|
||||
+ .addImm(StackProbeSize)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+
|
||||
+ BuildMI(headMBB, DL,
|
||||
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
|
||||
+ .addReg(FinalStackProbed)
|
||||
+ .addReg(StackPtr)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+
|
||||
+ // jump
|
||||
+ BuildMI(headMBB, DL, TII.get(X86::JCC_1))
|
||||
+ .addMBB(footMBB)
|
||||
+ .addImm(X86::COND_B)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+
|
||||
+ headMBB->addSuccessor(bodyMBB);
|
||||
+ headMBB->addSuccessor(footMBB);
|
||||
+ }
|
||||
+
|
||||
+ // setup loop body
|
||||
+ {
|
||||
+ addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
|
||||
+ .setMIFlag(MachineInstr::FrameSetup),
|
||||
+ StackPtr, false, 0)
|
||||
+ .addImm(0)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+
|
||||
+ const unsigned SUBOpc =
|
||||
+ getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
|
||||
+ BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
|
||||
+ .addReg(StackPtr)
|
||||
+ .addImm(StackProbeSize)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+
|
||||
+ // cmp with stack pointer bound
|
||||
+ BuildMI(bodyMBB, DL,
|
||||
+ TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
|
||||
+ .addReg(FinalStackProbed)
|
||||
+ .addReg(StackPtr)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+
|
||||
+ // jump
|
||||
+ BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
|
||||
+ .addMBB(bodyMBB)
|
||||
+ .addImm(X86::COND_B)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ bodyMBB->addSuccessor(bodyMBB);
|
||||
+ bodyMBB->addSuccessor(footMBB);
|
||||
+ }
|
||||
+
|
||||
+ // setup loop footer
|
||||
+ {
|
||||
+ BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
|
||||
+ .addReg(FinalStackProbed)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
|
||||
+ .setMIFlag(MachineInstr::FrameSetup),
|
||||
+ StackPtr, false, 0)
|
||||
+ .addImm(0)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+ footMBB->addSuccessor(&MBB);
|
||||
+ }
|
||||
+
|
||||
+ recomputeLiveIns(*headMBB);
|
||||
+ recomputeLiveIns(*bodyMBB);
|
||||
+ recomputeLiveIns(*footMBB);
|
||||
+ recomputeLiveIns(MBB);
|
||||
+ }
|
||||
+ } else {
|
||||
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
|
||||
+ .addReg(Reg)
|
||||
+ .addImm(Val)
|
||||
+ .setMIFlag(MachineInstr::FrameSetup);
|
||||
+
|
||||
+ // The EFLAGS implicit def is dead.
|
||||
+ MI->getOperand(3).setIsDead();
|
||||
+ }
|
||||
}
|
||||
|
||||
bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
|
||||
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
|
||||
index c0b4be95f88d..bb2e83205e71 100644
|
||||
--- a/llvm/lib/Target/X86/X86FrameLowering.h
|
||||
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
|
||||
@@ -213,14 +213,14 @@ private:
|
||||
void emitStackProbeInlineGenericBlock(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
- const DebugLoc &DL,
|
||||
- uint64_t Offset) const;
|
||||
+ const DebugLoc &DL, uint64_t Offset,
|
||||
+ uint64_t Align) const;
|
||||
|
||||
void emitStackProbeInlineGenericLoop(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
- const DebugLoc &DL,
|
||||
- uint64_t Offset) const;
|
||||
+ const DebugLoc &DL, uint64_t Offset,
|
||||
+ uint64_t Align) const;
|
||||
|
||||
/// Emit a stub to later inline the target stack probe.
|
||||
MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
|
||||
diff --git a/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
|
||||
new file mode 100644
|
||||
index 000000000000..6c981cb4ac91
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/X86/stack-clash-large-large-align.ll
|
||||
@@ -0,0 +1,88 @@
|
||||
+; RUN: llc < %s | FileCheck %s
|
||||
+
|
||||
+
|
||||
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
+target triple = "x86_64-unknown-linux-gnu"
|
||||
+
|
||||
+define i32 @foo_noprotect() local_unnamed_addr {
|
||||
+; CHECK-LABEL: foo_noprotect:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: pushq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
+; CHECK-NEXT: movq %rsp, %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
+; CHECK-NEXT: andq $-4096, %rsp # imm = 0xF000
|
||||
+; CHECK-NEXT: subq $73728, %rsp # imm = 0x12000
|
||||
+; CHECK-NEXT: movl $1, 392(%rsp)
|
||||
+; CHECK-NEXT: movl $1, 28792(%rsp)
|
||||
+; CHECK-NEXT: movl (%rsp), %eax
|
||||
+; CHECK-NEXT: movq %rbp, %rsp
|
||||
+; CHECK-NEXT: popq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
+; CHECK-NEXT: retq
|
||||
+
|
||||
+
|
||||
+ %a = alloca i32, i64 18000, align 4096
|
||||
+ %b0 = getelementptr inbounds i32, i32* %a, i64 98
|
||||
+ %b1 = getelementptr inbounds i32, i32* %a, i64 7198
|
||||
+ store volatile i32 1, i32* %b0
|
||||
+ store volatile i32 1, i32* %b1
|
||||
+ %c = load volatile i32, i32* %a
|
||||
+ ret i32 %c
|
||||
+}
|
||||
+
|
||||
+define i32 @foo_protect() local_unnamed_addr #0 {
|
||||
+; CHECK-LABEL: foo_protect:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: pushq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
+; CHECK-NEXT: movq %rsp, %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
+; CHECK-NEXT: movq %rsp, %r11
|
||||
+; CHECK-NEXT: andq $-4096, %r11 # imm = 0xF000
|
||||
+; CHECK-NEXT: cmpq %rsp, %r11
|
||||
+; CHECK-NEXT: je .LBB1_4
|
||||
+; CHECK-NEXT:# %bb.1:
|
||||
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
+; CHECK-NEXT: cmpq %rsp, %r11
|
||||
+; CHECK-NEXT: jb .LBB1_3
|
||||
+; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
+; CHECK-NEXT: cmpq %rsp, %r11
|
||||
+; CHECK-NEXT: jb .LBB1_2
|
||||
+; CHECK-NEXT:.LBB1_3:
|
||||
+; CHECK-NEXT: movq %r11, %rsp
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT:.LBB1_4:
|
||||
+; CHECK-NEXT: movq %rsp, %r11
|
||||
+; CHECK-NEXT: subq $73728, %r11 # imm = 0x12000
|
||||
+; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1
|
||||
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: cmpq %r11, %rsp
|
||||
+; CHECK-NEXT: jne .LBB1_5
|
||||
+; CHECK-NEXT:# %bb.6:
|
||||
+; CHECK-NEXT: movl $1, 392(%rsp)
|
||||
+; CHECK-NEXT: movl $1, 28792(%rsp)
|
||||
+; CHECK-NEXT: movl (%rsp), %eax
|
||||
+; CHECK-NEXT: movq %rbp, %rsp
|
||||
+; CHECK-NEXT: popq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
+; CHECK-NEXT: retq
|
||||
+
|
||||
+
|
||||
+
|
||||
+
|
||||
+ %a = alloca i32, i64 18000, align 4096
|
||||
+ %b0 = getelementptr inbounds i32, i32* %a, i64 98
|
||||
+ %b1 = getelementptr inbounds i32, i32* %a, i64 7198
|
||||
+ store volatile i32 1, i32* %b0
|
||||
+ store volatile i32 1, i32* %b1
|
||||
+ %c = load volatile i32, i32* %a
|
||||
+ ret i32 %c
|
||||
+}
|
||||
+
|
||||
+attributes #0 = {"probe-stack"="inline-asm"}
|
||||
diff --git a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll b/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
|
||||
deleted file mode 100644
|
||||
index 652acbdf00ba..000000000000
|
||||
--- a/llvm/test/CodeGen/X86/stack-clash-no-free-probe.ll
|
||||
+++ /dev/null
|
||||
@@ -1,27 +0,0 @@
|
||||
-; RUN: llc < %s | FileCheck %s
|
||||
-
|
||||
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
-target triple = "x86_64-unknown-linux-gnu"
|
||||
-
|
||||
-define i32 @foo(i64 %i) local_unnamed_addr #0 {
|
||||
-; CHECK-LABEL: foo:
|
||||
-; CHECK: # %bb.0:
|
||||
-; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
-; CHECK-NEXT: movq $0, (%rsp)
|
||||
-; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8
|
||||
-; CHECK-NEXT: .cfi_def_cfa_offset 7888
|
||||
-; CHECK-NEXT: movl $1, -128(%rsp,%rdi,4)
|
||||
-; CHECK-NEXT: movl -128(%rsp), %eax
|
||||
-; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8
|
||||
-; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
-; CHECK-NEXT: retq
|
||||
-
|
||||
- %a = alloca i32, i32 2000, align 16
|
||||
- %b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
- store volatile i32 1, i32* %b
|
||||
- %c = load volatile i32, i32* %a
|
||||
- ret i32 %c
|
||||
-}
|
||||
-
|
||||
-attributes #0 = {"probe-stack"="inline-asm"}
|
||||
-
|
||||
diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
|
||||
new file mode 100644
|
||||
index 000000000000..eafa86f1eba9
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
|
||||
@@ -0,0 +1,135 @@
|
||||
+; RUN: llc < %s | FileCheck %s
|
||||
+
|
||||
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
+target triple = "x86_64-unknown-linux-gnu"
|
||||
+
|
||||
+; | case1 | alloca + align < probe_size
|
||||
+define i32 @foo1(i64 %i) local_unnamed_addr #0 {
|
||||
+; CHECK-LABEL: foo1:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: pushq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
+; CHECK-NEXT: movq %rsp, %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
+; CHECK-NEXT: andq $-64, %rsp
|
||||
+; CHECK-NEXT: subq $832, %rsp # imm = 0x340
|
||||
+; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
|
||||
+; CHECK-NEXT: movl (%rsp), %eax
|
||||
+; CHECK-NEXT: movq %rbp, %rsp
|
||||
+; CHECK-NEXT: popq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
+; CHECK-NEXT: retq
|
||||
+
|
||||
+ %a = alloca i32, i32 200, align 64
|
||||
+ %b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
+ store volatile i32 1, i32* %b
|
||||
+ %c = load volatile i32, i32* %a
|
||||
+ ret i32 %c
|
||||
+}
|
||||
+
|
||||
+; | case2 | alloca > probe_size, align > probe_size
|
||||
+define i32 @foo2(i64 %i) local_unnamed_addr #0 {
|
||||
+; CHECK-LABEL: foo2:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: pushq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
+; CHECK-NEXT: movq %rsp, %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
+; CHECK-NEXT: andq $-2048, %rsp # imm = 0xF800
|
||||
+; CHECK-NEXT: subq $2048, %rsp # imm = 0x800
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: subq $2048, %rsp # imm = 0x800
|
||||
+; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
|
||||
+; CHECK-NEXT: movl (%rsp), %eax
|
||||
+; CHECK-NEXT: movq %rbp, %rsp
|
||||
+; CHECK-NEXT: popq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
+; CHECK-NEXT: retq
|
||||
+
|
||||
+ %a = alloca i32, i32 2000, align 2048
|
||||
+ %b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
+ store volatile i32 1, i32* %b
|
||||
+ %c = load volatile i32, i32* %a
|
||||
+ ret i32 %c
|
||||
+}
|
||||
+
|
||||
+; | case3 | alloca < probe_size, align < probe_size, alloca + align > probe_size
|
||||
+define i32 @foo3(i64 %i) local_unnamed_addr #0 {
|
||||
+; CHECK-LABEL: foo3:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: pushq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
+; CHECK-NEXT: movq %rsp, %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
+; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00
|
||||
+; CHECK-NEXT: subq $3072, %rsp # imm = 0xC00
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: subq $1024, %rsp # imm = 0x400
|
||||
+; CHECK-NEXT: movl $1, (%rsp,%rdi,4)
|
||||
+; CHECK-NEXT: movl (%rsp), %eax
|
||||
+; CHECK-NEXT: movq %rbp, %rsp
|
||||
+; CHECK-NEXT: popq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
+; CHECK-NEXT: retq
|
||||
+
|
||||
+
|
||||
+ %a = alloca i32, i32 1000, align 1024
|
||||
+ %b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
+ store volatile i32 1, i32* %b
|
||||
+ %c = load volatile i32, i32* %a
|
||||
+ ret i32 %c
|
||||
+}
|
||||
+
|
||||
+; | case4 | alloca + probe_size < probe_size, followed by dynamic alloca
|
||||
+define i32 @foo4(i64 %i) local_unnamed_addr #0 {
|
||||
+; CHECK-LABEL: foo4:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: pushq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
+; CHECK-NEXT: movq %rsp, %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
+; CHECK-NEXT: pushq %rbx
|
||||
+; CHECK-NEXT: andq $-64, %rsp
|
||||
+; CHECK-NEXT: subq $896, %rsp # imm = 0x380
|
||||
+; CHECK-NEXT: movq %rsp, %rbx
|
||||
+; CHECK-NEXT: .cfi_offset %rbx, -24
|
||||
+; CHECK-NEXT: movl $1, (%rbx,%rdi,4)
|
||||
+; CHECK-NEXT: movl (%rbx), %ecx
|
||||
+; CHECK-NEXT: movq %rsp, %rax
|
||||
+; CHECK-NEXT: leaq 15(,%rcx,4), %rcx
|
||||
+; CHECK-NEXT: andq $-16, %rcx
|
||||
+; CHECK-NEXT: subq %rcx, %rax
|
||||
+; CHECK-NEXT: cmpq %rsp, %rax
|
||||
+; CHECK-NEXT: jle .LBB3_3
|
||||
+; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
+; CHECK-NEXT: cmpq %rsp, %rax
|
||||
+; CHECK-NEXT: jg .LBB3_2
|
||||
+; CHECK-NEXT:.LBB3_3:
|
||||
+; CHECK-NEXT: andq $-64, %rax
|
||||
+; CHECK-NEXT: movq %rax, %rsp
|
||||
+; CHECK-NEXT: movl (%rax), %eax
|
||||
+; CHECK-NEXT: leaq -8(%rbp), %rsp
|
||||
+; CHECK-NEXT: popq %rbx
|
||||
+; CHECK-NEXT: popq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
+; CHECK-NEXT: retq
|
||||
+
|
||||
+ %a = alloca i32, i32 200, align 64
|
||||
+ %b = getelementptr inbounds i32, i32* %a, i64 %i
|
||||
+ store volatile i32 1, i32* %b
|
||||
+ %c = load volatile i32, i32* %a
|
||||
+ %d = alloca i32, i32 %c, align 64
|
||||
+ %e = load volatile i32, i32* %d
|
||||
+ ret i32 %e
|
||||
+}
|
||||
+
|
||||
+attributes #0 = {"probe-stack"="inline-asm"}
|
||||
+
|
||||
diff --git a/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
|
||||
new file mode 100644
|
||||
index 000000000000..e608bab90415
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/X86/stack-clash-small-large-align.ll
|
||||
@@ -0,0 +1,83 @@
|
||||
+; RUN: llc < %s | FileCheck %s
|
||||
+
|
||||
+
|
||||
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
+target triple = "x86_64-unknown-linux-gnu"
|
||||
+
|
||||
+define i32 @foo_noprotect() local_unnamed_addr {
|
||||
+; CHECK-LABEL: foo_noprotect:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: pushq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
+; CHECK-NEXT: movq %rsp, %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
+; CHECK-NEXT: andq $-65536, %rsp
|
||||
+; CHECK-NEXT: subq $65536, %rsp
|
||||
+; CHECK-NEXT: movl $1, 392(%rsp)
|
||||
+; CHECK-NEXT: movl (%rsp), %eax
|
||||
+; CHECK-NEXT: movq %rbp, %rsp
|
||||
+; CHECK-NEXT: popq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
+; CHECK-NEXT: retq
|
||||
+
|
||||
+
|
||||
+
|
||||
+ %a = alloca i32, i64 100, align 65536
|
||||
+ %b = getelementptr inbounds i32, i32* %a, i64 98
|
||||
+ store volatile i32 1, i32* %b
|
||||
+ %c = load volatile i32, i32* %a
|
||||
+ ret i32 %c
|
||||
+}
|
||||
+
|
||||
+define i32 @foo_protect() local_unnamed_addr #0 {
|
||||
+; CHECK-LABEL: foo_protect:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: pushq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
+; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
+; CHECK-NEXT: movq %rsp, %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
+; CHECK-NEXT: movq %rsp, %r11
|
||||
+; CHECK-NEXT: andq $-65536, %r11 # imm = 0xFFFF0000
|
||||
+; CHECK-NEXT: cmpq %rsp, %r11
|
||||
+; CHECK-NEXT: je .LBB1_4
|
||||
+; CHECK-NEXT:# %bb.1:
|
||||
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
+; CHECK-NEXT: cmpq %rsp, %r11
|
||||
+; CHECK-NEXT: jb .LBB1_3
|
||||
+; CHECK-NEXT:.LBB1_2: # =>This Inner Loop Header: Depth=1
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
+; CHECK-NEXT: cmpq %rsp, %r11
|
||||
+; CHECK-NEXT: jb .LBB1_2
|
||||
+; CHECK-NEXT:.LBB1_3:
|
||||
+; CHECK-NEXT: movq %r11, %rsp
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT:.LBB1_4:
|
||||
+; CHECK-NEXT: movq %rsp, %r11
|
||||
+; CHECK-NEXT: subq $65536, %r11 # imm = 0x10000
|
||||
+; CHECK-NEXT:.LBB1_5: # =>This Inner Loop Header: Depth=1
|
||||
+; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
+; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: cmpq %r11, %rsp
|
||||
+; CHECK-NEXT: jne .LBB1_5
|
||||
+; CHECK-NEXT:# %bb.6:
|
||||
+; CHECK-NEXT: movl $1, 392(%rsp)
|
||||
+; CHECK-NEXT: movl (%rsp), %eax
|
||||
+; CHECK-NEXT: movq %rbp, %rsp
|
||||
+; CHECK-NEXT: popq %rbp
|
||||
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
|
||||
+; CHECK-NEXT: retq
|
||||
+
|
||||
+
|
||||
+
|
||||
+
|
||||
+ %a = alloca i32, i64 100, align 65536
|
||||
+ %b = getelementptr inbounds i32, i32* %a, i64 98
|
||||
+ store volatile i32 1, i32* %b
|
||||
+ %c = load volatile i32, i32* %a
|
||||
+ ret i32 %c
|
||||
+}
|
||||
+
|
||||
+attributes #0 = {"probe-stack"="inline-asm"}
|
||||
|
||||
From bbe6cbbed8c7460a7e8477373b9250543362e771 Mon Sep 17 00:00:00 2001
|
||||
From: serge-sans-paille <sguelton@redhat.com>
|
||||
Date: Tue, 27 Oct 2020 10:59:42 +0100
|
||||
Subject: [PATCH 3/3] [stack-clash] Fix probing of dynamic alloca
|
||||
|
||||
- Perform the probing in the correct direction.
|
||||
Related to https://github.com/rust-lang/rust/pull/77885#issuecomment-711062924
|
||||
|
||||
- The first touch on a dynamic alloca cannot use a mov because it clobbers
|
||||
existing space. Use a xor 0 instead
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D90216
|
||||
|
||||
(cherry picked from commit 0f60bcc36c34522618bd1425a45f8c6006568fb6)
|
||||
---
|
||||
llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++----
|
||||
llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll | 12 ++++++------
|
||||
.../X86/stack-clash-small-alloc-medium-align.ll | 6 +++---
|
||||
3 files changed, 13 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
index f68ae4461fe3..afe470cc6e0b 100644
|
||||
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
|
||||
@@ -31876,7 +31876,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
|
||||
|
||||
BuildMI(testMBB, DL, TII->get(X86::JCC_1))
|
||||
.addMBB(tailMBB)
|
||||
- .addImm(X86::COND_LE);
|
||||
+ .addImm(X86::COND_GE);
|
||||
testMBB->addSuccessor(blockMBB);
|
||||
testMBB->addSuccessor(tailMBB);
|
||||
|
||||
@@ -31892,9 +31892,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
|
||||
//
|
||||
// The property we want to enforce is to never have more than [page alloc] between two probes.
|
||||
|
||||
- const unsigned MovMIOpc =
|
||||
- TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
- addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0)
|
||||
+ const unsigned XORMIOpc =
|
||||
+ TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8;
|
||||
+ addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
|
||||
.addImm(0);
|
||||
|
||||
BuildMI(blockMBB, DL,
|
||||
diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
|
||||
index 82fd67842c8a..6dd8b6ab5897 100644
|
||||
--- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
|
||||
+++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll
|
||||
@@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
|
||||
; CHECK-X86-64-NEXT: andq $-16, %rcx
|
||||
; CHECK-X86-64-NEXT: subq %rcx, %rax
|
||||
; CHECK-X86-64-NEXT: cmpq %rsp, %rax
|
||||
-; CHECK-X86-64-NEXT: jle .LBB0_3
|
||||
+; CHECK-X86-64-NEXT: jge .LBB0_3
|
||||
; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
|
||||
-; CHECK-X86-64-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-X86-64-NEXT: xorq $0, (%rsp)
|
||||
; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-X86-64-NEXT: cmpq %rsp, %rax
|
||||
-; CHECK-X86-64-NEXT: jg .LBB0_2
|
||||
+; CHECK-X86-64-NEXT: jl .LBB0_2
|
||||
; CHECK-X86-64-NEXT: .LBB0_3:
|
||||
; CHECK-X86-64-NEXT: movq %rax, %rsp
|
||||
; CHECK-X86-64-NEXT: movl $1, 4792(%rax)
|
||||
@@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"}
|
||||
; CHECK-X86-32-NEXT: andl $-16, %ecx
|
||||
; CHECK-X86-32-NEXT: subl %ecx, %eax
|
||||
; CHECK-X86-32-NEXT: cmpl %esp, %eax
|
||||
-; CHECK-X86-32-NEXT: jle .LBB0_3
|
||||
+; CHECK-X86-32-NEXT: jge .LBB0_3
|
||||
; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
|
||||
-; CHECK-X86-32-NEXT: movl $0, (%esp)
|
||||
+; CHECK-X86-32-NEXT: xorl $0, (%esp)
|
||||
; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000
|
||||
; CHECK-X86-32-NEXT: cmpl %esp, %eax
|
||||
-; CHECK-X86-32-NEXT: jg .LBB0_2
|
||||
+; CHECK-X86-32-NEXT: jl .LBB0_2
|
||||
; CHECK-X86-32-NEXT: .LBB0_3:
|
||||
; CHECK-X86-32-NEXT: movl %eax, %esp
|
||||
; CHECK-X86-32-NEXT: movl $1, 4792(%eax)
|
||||
diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
|
||||
index eafa86f1eba9..39b6c3640a60 100644
|
||||
--- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
|
||||
+++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll
|
||||
@@ -106,12 +106,12 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 {
|
||||
; CHECK-NEXT: andq $-16, %rcx
|
||||
; CHECK-NEXT: subq %rcx, %rax
|
||||
; CHECK-NEXT: cmpq %rsp, %rax
|
||||
-; CHECK-NEXT: jle .LBB3_3
|
||||
+; CHECK-NEXT: jge .LBB3_3
|
||||
; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1
|
||||
-; CHECK-NEXT: movq $0, (%rsp)
|
||||
+; CHECK-NEXT: xorq $0, (%rsp)
|
||||
; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000
|
||||
; CHECK-NEXT: cmpq %rsp, %rax
|
||||
-; CHECK-NEXT: jg .LBB3_2
|
||||
+; CHECK-NEXT: jl .LBB3_2
|
||||
; CHECK-NEXT:.LBB3_3:
|
||||
; CHECK-NEXT: andq $-64, %rax
|
||||
; CHECK-NEXT: movq %rax, %rsp
|
Loading…
Reference in New Issue
Block a user