hsail,gpu-compute: fixes to appease clang++

fixes to appease clang++. tested on:

Ubuntu clang version 3.5.0-4ubuntu2~trusty2
(tags/RELEASE_350/final) (based on LLVM 3.5.0)

Ubuntu clang version 3.6.0-2ubuntu1~trusty1
(tags/RELEASE_360/final) (based on LLVM 3.6.0)

the fixes address the following five issues:

1) the exec continuations in gpu_static_inst.hh were marked
   as protected when they should be public. here we mark
   them as public

2) the Abs instruction uses std::abs() in its execute method.
   because Abs is templated, it can also operate on U32 and U64,
   types, which cause Abs::execute() to pass uint32_t and uint64_t
   types to std::abs() respectively. this triggers a warning
   because std::abs() has no effect in this case. to rememdy this
   we add template specialization for the execute() method of Abs
   when its template paramter is U32 or U64.

3) Some potocols that utilize the code in cprintf.hh were missing
   includes to BoolVec.hh, which defines operator<< for the BoolVec
   type. This would cause issues when the generated code would try
   to pass a BoolVec type to a method in cprintf.hh that used
   operator<< on an instance of a BoolVec.

4) Surprise, clang doesn't like it when you clobber all the bits
   in a newly allocated object. I.e., this code:

   tlb = new GpuTlbEntry\[size\];
   std::memset(tlb, 0, sizeof(GpuTlbEntry) \* size);

   Let's use std::vector to track the TLB entries in the GpuTlb now...

5) There were a few variables used only in DPRINTFs, so we mark them
   with M5_VAR_USED.
This commit is contained in:
Tony Gutierrez 2016-10-26 22:48:45 -04:00
parent dc16c1ceb8
commit 74249f80df
6 changed files with 59 additions and 11 deletions

View file

@ -776,6 +776,52 @@ gen('Call', base_class='SpecialInstNoSrcNoDest')
# Generate file epilogs
#
###############
header_code('''
template<>
inline void
Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
CType src_val;
src_val = this->src[0].template get<CType>(w, lane);
dest_val = (CType)(src_val);
this->dest.set(w, lane, dest_val);
}
}
}
template<>
inline void
Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
CType src_val;
src_val = this->src[0].template get<CType>(w, lane);
dest_val = (CType)(src_val);
this->dest.set(w, lane, dest_val);
}
}
}
''')
header_code.dedent()
header_code('''
} // namespace HsailISA

View file

@ -73,7 +73,10 @@ static const int ArgumentReg[] = {
INTREG_R8W,
INTREG_R9W
};
static const int NumArgumentRegs = sizeof(ArgumentReg) / sizeof(const int);
static const int NumArgumentRegs M5_VAR_USED =
sizeof(ArgumentReg) / sizeof(const int);
static const int ArgumentReg32[] = {
INTREG_EBX,
INTREG_ECX,
@ -82,7 +85,9 @@ static const int ArgumentReg32[] = {
INTREG_EDI,
INTREG_EBP
};
static const int NumArgumentRegs32 = sizeof(ArgumentReg) / sizeof(const int);
static const int NumArgumentRegs32 M5_VAR_USED =
sizeof(ArgumentReg) / sizeof(const int);
X86LiveProcess::X86LiveProcess(LiveProcessParams * params, ObjectFile *objFile,
SyscallDesc *_syscallDescs, int _numSyscallDescs) :

View file

@ -221,7 +221,6 @@ class GPUStaticInst : public GPUStaticInstFlags
void setFlag(Flags flag) { _flags[flag] = true; }
protected:
virtual void
execLdAcq(GPUDynInstPtr gpuDynInst)
{
@ -246,6 +245,7 @@ class GPUStaticInst : public GPUStaticInstFlags
fatal("calling execAtomicAcq() on a non-atomic instruction.\n");
}
protected:
const std::string opcode;
std::string disassembly;
int _instNum;

View file

@ -71,8 +71,7 @@ namespace X86ISA
accessDistance = p->accessDistance;
clock = p->clk_domain->clockPeriod();
tlb = new GpuTlbEntry[size];
std::memset(tlb, 0, sizeof(GpuTlbEntry) * size);
tlb.assign(size, GpuTlbEntry());
freeList.resize(numSets);
entryList.resize(numSets);
@ -80,7 +79,7 @@ namespace X86ISA
for (int set = 0; set < numSets; ++set) {
for (int way = 0; way < assoc; ++way) {
int x = set * assoc + way;
freeList[set].push_back(&tlb[x]);
freeList[set].push_back(&tlb.at(x));
}
}
@ -133,9 +132,6 @@ namespace X86ISA
{
// make sure all the hash-maps are empty
assert(translationReturnEvent.empty());
// delete the TLB
delete[] tlb;
}
BaseSlavePort&

View file

@ -170,7 +170,7 @@ namespace X86ISA
*/
bool accessDistance;
GpuTlbEntry *tlb;
std::vector<GpuTlbEntry> tlb;
/*
* It's a per-set list. As long as we have not reached

View file

@ -459,6 +459,7 @@ void unset_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr);
#include <typeinfo>
#include "base/compiler.hh"
#include "mem/ruby/common/BoolVec.hh"
#include "base/cprintf.hh"
''')