126 lines
4.3 KiB
C++
126 lines
4.3 KiB
C++
|
//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
|
||
|
//
|
||
|
// The LLVM Compiler Infrastructure
|
||
|
//
|
||
|
// This file is distributed under the University of Illinois Open Source
|
||
|
// License. See LICENSE.TXT for details.
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
// This provides a class for CUDA code generation targeting the NVIDIA CUDA
|
||
|
// runtime library.
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "CGCUDARuntime.h"
|
||
|
#include "CodeGenFunction.h"
|
||
|
#include "CodeGenModule.h"
|
||
|
#include "clang/AST/Decl.h"
|
||
|
#include "llvm/IR/BasicBlock.h"
|
||
|
#include "llvm/IR/Constants.h"
|
||
|
#include "llvm/IR/DerivedTypes.h"
|
||
|
#include "llvm/Support/CallSite.h"
|
||
|
#include <vector>
|
||
|
|
||
|
using namespace clang;
|
||
|
using namespace CodeGen;
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
class CGNVCUDARuntime : public CGCUDARuntime {
|
||
|
|
||
|
private:
|
||
|
llvm::Type *IntTy, *SizeTy;
|
||
|
llvm::PointerType *CharPtrTy, *VoidPtrTy;
|
||
|
|
||
|
llvm::Constant *getSetupArgumentFn() const;
|
||
|
llvm::Constant *getLaunchFn() const;
|
||
|
|
||
|
public:
|
||
|
CGNVCUDARuntime(CodeGenModule &CGM);
|
||
|
|
||
|
void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
|
||
|
};
|
||
|
|
||
|
}
|
||
|
|
||
|
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
|
||
|
CodeGen::CodeGenTypes &Types = CGM.getTypes();
|
||
|
ASTContext &Ctx = CGM.getContext();
|
||
|
|
||
|
IntTy = Types.ConvertType(Ctx.IntTy);
|
||
|
SizeTy = Types.ConvertType(Ctx.getSizeType());
|
||
|
|
||
|
CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
|
||
|
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
|
||
|
}
|
||
|
|
||
|
llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
|
||
|
// cudaError_t cudaSetupArgument(void *, size_t, size_t)
|
||
|
std::vector<llvm::Type*> Params;
|
||
|
Params.push_back(VoidPtrTy);
|
||
|
Params.push_back(SizeTy);
|
||
|
Params.push_back(SizeTy);
|
||
|
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
|
||
|
Params, false),
|
||
|
"cudaSetupArgument");
|
||
|
}
|
||
|
|
||
|
llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
|
||
|
// cudaError_t cudaLaunch(char *)
|
||
|
std::vector<llvm::Type*> Params;
|
||
|
Params.push_back(CharPtrTy);
|
||
|
return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
|
||
|
Params, false),
|
||
|
"cudaLaunch");
|
||
|
}
|
||
|
|
||
|
void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
|
||
|
FunctionArgList &Args) {
|
||
|
// Build the argument value list and the argument stack struct type.
|
||
|
SmallVector<llvm::Value *, 16> ArgValues;
|
||
|
std::vector<llvm::Type *> ArgTypes;
|
||
|
for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
|
||
|
I != E; ++I) {
|
||
|
llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
|
||
|
ArgValues.push_back(V);
|
||
|
assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
|
||
|
ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
|
||
|
}
|
||
|
llvm::StructType *ArgStackTy = llvm::StructType::get(
|
||
|
CGF.getLLVMContext(), ArgTypes);
|
||
|
|
||
|
llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
|
||
|
|
||
|
// Emit the calls to cudaSetupArgument
|
||
|
llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
|
||
|
for (unsigned I = 0, E = Args.size(); I != E; ++I) {
|
||
|
llvm::Value *Args[3];
|
||
|
llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
|
||
|
Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
|
||
|
Args[1] = CGF.Builder.CreateIntCast(
|
||
|
llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
|
||
|
SizeTy, false);
|
||
|
Args[2] = CGF.Builder.CreateIntCast(
|
||
|
llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
|
||
|
SizeTy, false);
|
||
|
llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
|
||
|
llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
|
||
|
llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
|
||
|
CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
|
||
|
CGF.EmitBlock(NextBlock);
|
||
|
}
|
||
|
|
||
|
// Emit the call to cudaLaunch
|
||
|
llvm::Constant *cudaLaunchFn = getLaunchFn();
|
||
|
llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
|
||
|
CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
|
||
|
CGF.EmitBranch(EndBlock);
|
||
|
|
||
|
CGF.EmitBlock(EndBlock);
|
||
|
}
|
||
|
|
||
|
CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
|
||
|
return new CGNVCUDARuntime(CGM);
|
||
|
}
|