minix/minix/llvm/passes/include/magic/MagicPass.h
David van Moolenbroek b7725c8552 Fix mmap leak in malloc code upon state transfer
The NetBSD libc malloc implementation uses a memory-mapped area for
its page directory.  Since the process heap is reconstructed upon
state transfer for live update, this memory-mapped area must not be
transferred to the new process.  However, as the new instance of the
process being updated inherits all memory-mapped areas of the old
instance, it also automatically inherits the malloc implementation's
page directory.  Thus, we must explicitly free this area in order to
avoid a memory leak.

The magic pass already detects (de)allocation functions called from
within other (de)allocation functions, which is why the mmap(2) and
munmap(2) calls of the malloc code are not instrumented as it is.
This patch changes that particular case to allow a different hook
function to be called for such "nested" allocation calls, for a
particular set of nested calls.  In particular, the malloc(3) code's
mmap(2) and munmap(2) calls are replaced with magic_nested_mmap and
magic_nested_munmap calls, respectively.  The magic library then
tracks memory mapping allocations of the malloc code by providing an
implementation for these two wrappers, and frees the allocations upon
state transfer.

This approach was chosen over various alternatives:

- While it appears that nesting could be established by setting a
  flag while the malloc(3) wrapper is active, and testing the flag in
  the mmap(2)/munmap(2) wrappers, this approach would fail to detect
  memory-mapped allocations made from uninstrumented malloc(3) calls,
  and therefore not a viable option.
- It would be possible to obtain the value of the variables that
  store the information about the memory-mapped area in the malloc
  code.  However, this is rather difficult in practice due to the way
  the libc malloc implementation stores the size of the are, and it
  would make the solution more dependent on the specific libc malloc
  implementation.
- It would be possible to use the special "nested" instrumentation
  for allocations made from certain marked sections.  Since we mark
  the data section of the malloc code already, this would not be hard
  to do.  Switching to this alternative would change very little, and
  if for any reason this approach yields more advantages in the
  future, we can still choose to do so.

Change-Id: Id977405da86a72458dd10f18e076d8460fd2fb75
2015-09-17 14:04:43 +00:00

140 lines
5.9 KiB
C++

#ifndef MAGIC_PASS_H
#define MAGIC_PASS_H
#include <pass.h>
#include <magic/magic.h>
#include <magic/support/Backports.h>
#include <magic/support/MagicUtil.h>
#include <magic/support/SmartType.h>
#include <magic/support/TypeInfo.h>
#include <magic/support/MagicDebugFunction.h>
#include <magic/support/MagicMemFunction.h>
#include <magic/support/MagicMmapCtlFunction.h>
#if MAGIC_USE_QPROF_INSTRUMENTATION
#include <common/qprof_common.h>
#endif
using namespace llvm;
namespace llvm {
#define magicPassLog(M) DEBUG(dbgs() << "MagicPass: " << M << "\n")
#define magicPassErr(M) errs() << "MagicPass: " << M << "\n"
class MagicPass : public ModulePass {
public:
static char ID;
MagicPass();
std::vector<GlobalVariable*> getGlobalVariables() const;
std::vector<int> getGlobalVariableSizes() const;
std::vector<GlobalVariable*> getShadowGlobalVariables() const;
std::vector<Function*> getFunctions() const;
GlobalVariable* getMagicArray() const;
GlobalVariable* getMagicTypeArray() const;
GlobalVariable* getMagicFunctionArray() const;
GlobalVariable* getMagicDsindexArray() const;
virtual bool runOnModule(Module &M);
private:
std::vector<GlobalVariable*> globalVariables;
std::set<GlobalVariable*> globalVariablesWithAddressTaken;
std::vector<int> globalVariableSizes;
std::vector<GlobalVariable*> shadowGlobalVariables;
std::vector<Function*> functions;
std::vector<TypeInfo*> globalTypeInfos;
std::map<GlobalValue*, TypeInfo*> globalParentMap;
std::map<GlobalValue*, TypeInfo*>::iterator parentMapIt;
std::map<std::string, GlobalVariable*> stringOwnerMap;
std::map<std::string, GlobalVariable*>::iterator stringOwnerMapIt;
std::map<GlobalVariable*, std::string> stringOwnerInvertedMap;
std::map<GlobalVariable*, std::string>::iterator stringOwnerInvertedMapIt;
GlobalVariable* magicArray;
GlobalVariable* magicTypeArray;
GlobalVariable* magicFunctionArray;
GlobalVariable* magicDsindexArray;
std::vector<std::string> libPathRegexes;
std::vector<std::string> voidTypeAliases;
std::set<std::string> voidTypeAliasesSet;
std::vector<std::string> mmFuncPrefixes;
std::set<std::pair<std::string, std::string> > mmFuncPairs;
std::vector<std::string> mmPoolFunctions;
std::vector<std::string> mmapCtlFunctions;
std::set<std::string>::iterator stringSetIt;
std::set<Function*> brkFunctions;
std::set<Function*> sbrkFunctions;
std::vector<Regex*> magicDataSectionRegexes;
std::vector<Regex*> magicFunctionSectionRegexes;
std::vector<Regex*> extLibSectionRegexes;
#if MAGIC_USE_QPROF_INSTRUMENTATION
QProfConf *qprofConf;
#endif
void qprofInstrumentationInit(Module &M);
void qprofInstrumentationApply(Module &M);
bool checkPointerVariableIndexes(TYPECONST Type* type, std::vector<int> &ptrVarIndexes, unsigned offset=0);
void findPointerVariables(Function* function, Value *value, std::vector<Value*> &ptrVars, std::vector<std::vector<int> > &ptrVarIndexes, Value *parent = NULL, bool isUser=false);
TypeInfo* typeInfoFromPointerVariables(Module &M, TypeInfo *voidPtrTypeInfo, std::vector<Value*> &ptrVars, std::vector<std::vector<int> > &ptrVarIndexes, std::string &allocName);
TypeInfo* getAllocTypeInfo(Module &M, TypeInfo *voidPtrTypeInfo, const CallSite &CS, std::string &allocName, std::string &allocParentName);
TypeInfo* fillTypeInfos(TypeInfo &sourceTypeInfo, std::vector<TypeInfo*> &typeInfos);
TypeInfo* fillExternalTypeInfos(TYPECONST Type* sourceType, GlobalValue *parent, std::vector<TypeInfo*> &typeInfos);
void printInterestingTypes(TYPECONST TypeInfo *aTypeInfo);
unsigned getMaxRecursiveSequenceLength(TYPECONST TypeInfo *aTypeInfo);
FunctionType* getFunctionType(TYPECONST FunctionType *baseType, std::vector<unsigned> selectedArgs);
bool isCompatibleMagicMemFuncType(TYPECONST FunctionType *type, TYPECONST FunctionType* magicType);
Function* findWrapper(Module &M, std::string *magicMemPrefixes, Function *f, std::string fName);
void indexCasts(Module &M, User *U, std::vector<TYPECONST Type*> &intCastTypes, std::vector<int> &intCastValues, std::map<TYPECONST Type*, std::set<TYPECONST Type*> > &bitcastMap);
void fillStackInstrumentedFunctions(std::vector<Function*> &stackIntrumentedFuncs, Function *deepestLLFunction);
void indexLocalTypeInfos(Module &M, Function *F, std::map<AllocaInst*, std::pair<TypeInfo*, std::string> > &localMap);
void addMagicStackDsentryFuncCalls(Module &M, Function *insertCallsInFunc, Function *localsFromFunc, Function *dsentryCreateFunc, Function *dsentryDestroyFunc, TYPECONST StructType *dsentryStructType, std::map<AllocaInst*, std::pair<TypeInfo*, std::string> > localTypeInfoMap, std::map<TypeInfo*, Constant*> &magicArrayTypePtrMap, TypeInfo *voidPtrTypeInfo, std::vector<TypeInfo*> &typeInfoList, std::vector<std::pair<std::string, std::string> > &namesList, std::vector<int> &flagsList);
bool isExtLibrary(GlobalValue *GV, DIDescriptor *DID);
bool isMagicGV(Module &M, GlobalVariable *GV);
bool isMagicFunction(Module &M, Function *F);
};
inline std::vector<GlobalVariable*> MagicPass::getGlobalVariables() const {
return globalVariables;
}
inline std::vector<int> MagicPass::getGlobalVariableSizes() const {
return globalVariableSizes;
}
inline std::vector<GlobalVariable*> MagicPass::getShadowGlobalVariables() const {
return shadowGlobalVariables;
}
inline std::vector<Function*> MagicPass::getFunctions() const {
return functions;
}
inline GlobalVariable* MagicPass::getMagicArray() const {
return magicArray;
}
inline GlobalVariable* MagicPass::getMagicTypeArray() const {
return magicTypeArray;
}
inline GlobalVariable* MagicPass::getMagicFunctionArray() const {
return magicFunctionArray;
}
inline GlobalVariable* MagicPass::getMagicDsindexArray() const {
return magicDsindexArray;
}
}
#endif