1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15
16 #define DEBUG_TYPE "r600mergeclause"
17 #include "AMDGPU.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600MachineFunctionInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 using namespace llvm;
29
30 namespace {
31
isCFAlu(const MachineInstr * MI)32 static bool isCFAlu(const MachineInstr *MI) {
33 switch (MI->getOpcode()) {
34 case AMDGPU::CF_ALU:
35 case AMDGPU::CF_ALU_PUSH_BEFORE:
36 return true;
37 default:
38 return false;
39 }
40 }
41
42 class R600ClauseMergePass : public MachineFunctionPass {
43
44 private:
45 static char ID;
46 const R600InstrInfo *TII;
47
48 unsigned getCFAluSize(const MachineInstr *MI) const;
49 bool isCFAluEnabled(const MachineInstr *MI) const;
50
51 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
52 /// removed and their content affected to the previous alu clause.
53 /// This function parse instructions after CFAlu untill it find a disabled
54 /// CFAlu and merge the content, or an enabled CFAlu.
55 void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
56
57 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
58 /// it is the case.
59 bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
60 const;
61
62 public:
R600ClauseMergePass(TargetMachine & tm)63 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
64
65 virtual bool runOnMachineFunction(MachineFunction &MF);
66
67 const char *getPassName() const;
68 };
69
70 char R600ClauseMergePass::ID = 0;
71
getCFAluSize(const MachineInstr * MI) const72 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
73 assert(isCFAlu(MI));
74 return MI->getOperand(
75 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
76 }
77
isCFAluEnabled(const MachineInstr * MI) const78 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
79 assert(isCFAlu(MI));
80 return MI->getOperand(
81 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
82 }
83
cleanPotentialDisabledCFAlu(MachineInstr * CFAlu) const84 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
85 const {
86 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
87 MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
88 I++;
89 do {
90 while (I!= E && !isCFAlu(I))
91 I++;
92 if (I == E)
93 return;
94 MachineInstr *MI = I++;
95 if (isCFAluEnabled(MI))
96 break;
97 CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
98 MI->eraseFromParent();
99 } while (I != E);
100 }
101
mergeIfPossible(MachineInstr * RootCFAlu,const MachineInstr * LatrCFAlu) const102 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
103 const MachineInstr *LatrCFAlu) const {
104 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
105 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
106 unsigned RootInstCount = getCFAluSize(RootCFAlu),
107 LaterInstCount = getCFAluSize(LatrCFAlu);
108 unsigned CumuledInsts = RootInstCount + LaterInstCount;
109 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
110 DEBUG(dbgs() << "Excess inst counts\n");
111 return false;
112 }
113 if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
114 return false;
115 // Is KCache Bank 0 compatible ?
116 int Mode0Idx =
117 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
118 int KBank0Idx =
119 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
120 int KBank0LineIdx =
121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
122 if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
123 RootCFAlu->getOperand(Mode0Idx).getImm() &&
124 (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
125 RootCFAlu->getOperand(KBank0Idx).getImm() ||
126 LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
127 RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
128 DEBUG(dbgs() << "Wrong KC0\n");
129 return false;
130 }
131 // Is KCache Bank 1 compatible ?
132 int Mode1Idx =
133 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
134 int KBank1Idx =
135 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
136 int KBank1LineIdx =
137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
138 if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
139 RootCFAlu->getOperand(Mode1Idx).getImm() &&
140 (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
141 RootCFAlu->getOperand(KBank1Idx).getImm() ||
142 LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
143 RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
144 DEBUG(dbgs() << "Wrong KC0\n");
145 return false;
146 }
147 if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
148 RootCFAlu->getOperand(Mode0Idx).setImm(
149 LatrCFAlu->getOperand(Mode0Idx).getImm());
150 RootCFAlu->getOperand(KBank0Idx).setImm(
151 LatrCFAlu->getOperand(KBank0Idx).getImm());
152 RootCFAlu->getOperand(KBank0LineIdx).setImm(
153 LatrCFAlu->getOperand(KBank0LineIdx).getImm());
154 }
155 if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
156 RootCFAlu->getOperand(Mode1Idx).setImm(
157 LatrCFAlu->getOperand(Mode1Idx).getImm());
158 RootCFAlu->getOperand(KBank1Idx).setImm(
159 LatrCFAlu->getOperand(KBank1Idx).getImm());
160 RootCFAlu->getOperand(KBank1LineIdx).setImm(
161 LatrCFAlu->getOperand(KBank1LineIdx).getImm());
162 }
163 RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
164 RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
165 return true;
166 }
167
runOnMachineFunction(MachineFunction & MF)168 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
169 TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
170 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
171 BB != BB_E; ++BB) {
172 MachineBasicBlock &MBB = *BB;
173 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
174 MachineBasicBlock::iterator LatestCFAlu = E;
175 while (I != E) {
176 MachineInstr *MI = I++;
177 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
178 TII->mustBeLastInClause(MI->getOpcode()))
179 LatestCFAlu = E;
180 if (!isCFAlu(MI))
181 continue;
182 cleanPotentialDisabledCFAlu(MI);
183
184 if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
185 MI->eraseFromParent();
186 } else {
187 assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
188 LatestCFAlu = MI;
189 }
190 }
191 }
192 return false;
193 }
194
getPassName() const195 const char *R600ClauseMergePass::getPassName() const {
196 return "R600 Merge Clause Markers Pass";
197 }
198
199 } // end anonymous namespace
200
201
createR600ClauseMergePass(TargetMachine & TM)202 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
203 return new R600ClauseMergePass(TM);
204 }
205