From 05298c5b548744c6a180e7f77cff807a61f17111 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 24 Mar 2024 13:02:33 +0100 Subject: [PATCH 01/33] Add feature-taint analysis (WIP) --- .../Problems/IDEFeatureTaintAnalysis.h | 300 ++++++++++ include/phasar/Utils/BitVectorSet.h | 25 +- include/phasar/Utils/TypeTraits.h | 10 + .../Problems/IDEFeatureTaintAnalysis.cpp | 566 ++++++++++++++++++ 4 files changed, 895 insertions(+), 6 deletions(-) create mode 100644 include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h create mode 100644 lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h new file mode 100644 index 0000000000..b07c745dd0 --- /dev/null +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -0,0 +1,300 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEFEATURETAINTANALYSIS_H +#define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEFEATURETAINTANALYSIS_H + +#include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/BitVectorSet.h" +#include "phasar/Utils/JoinLattice.h" +#include "phasar/Utils/TypeTraits.h" + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" + +#include +#include +#include +#include +#include + +namespace llvm { +class GlobalVariable; +} // namespace llvm + +namespace psr { +class LLVMProjectIRDB; + +struct IDEFeatureTaintEdgeFact { + llvm::SmallBitVector Taints{}; + + static llvm::SmallBitVector fromBits(uintptr_t Bits) { +#if __has_builtin(__builtin_constant_p) + if (__builtin_constant_p(Bits) && Bits == 0) { + return {}; + } +#endif + + llvm::SmallBitVector Ret(llvm::findLastSet(Bits) + 1); + Ret.setBitsInMask((const uint32_t *)&Bits, sizeof(Bits)); + return Ret; + } + + IDEFeatureTaintEdgeFact(llvm::SmallBitVector &&Taints) noexcept + : Taints(std::move(Taints)) {} + IDEFeatureTaintEdgeFact(const llvm::SmallBitVector &Taints) + : Taints(Taints) {} + IDEFeatureTaintEdgeFact(uintptr_t Taints) noexcept + : Taints(fromBits(Taints)) {} + + void unionWith(uintptr_t Facts) { + auto RequiredSize = llvm::findLastSet(Facts) + 1; + if (RequiredSize > Taints.size()) { + Taints.resize(RequiredSize); + } + Taints.setBitsInMask((const uint32_t *)&Facts, sizeof(Facts)); + } + void unionWith(const IDEFeatureTaintEdgeFact &Facts) { + Taints |= Facts.Taints; + } + + [[nodiscard]] inline bool isBottom() const noexcept { + // TODO + return false; + } + + friend llvm::hash_code + hash_value(const IDEFeatureTaintEdgeFact &BV) noexcept { + if (BV.Taints.empty()) { + return {}; + } + uintptr_t Buf; + auto Words = BV.Taints.getData(Buf); + size_t Idx = Words.size(); + while (Idx && Words[Idx - 1] == 0) { + --Idx; + } + return llvm::hash_combine_range(Words.begin(), + std::next(Words.begin(), Idx)); + } + + friend bool operator==(const IDEFeatureTaintEdgeFact &Lhs, + const IDEFeatureTaintEdgeFact &Rhs) { + bool LeftEmpty = Lhs.Taints.none(); + bool RightEmpty = Rhs.Taints.none(); + if (LeftEmpty || RightEmpty) { + return LeftEmpty == RightEmpty; + } + // Check, whether Lhs and Rhs actually have the same bits set and not + // whether their internal representation is exactly identitcal + + uintptr_t LBuf, RBuf; + auto LhsWords = Lhs.Taints.getData(LBuf); + auto RhsWords = Rhs.Taints.getData(RBuf); + if (LhsWords.size() == RhsWords.size()) { + return LhsWords == RhsWords; + } + auto MinSize = std::min(LhsWords.size(), RhsWords.size()); + if (LhsWords.slice(0, MinSize) != RhsWords.slice(0, MinSize)) { + return false; + } + auto Rest = (LhsWords.size() > RhsWords.size() ? LhsWords : RhsWords) + .slice(MinSize); + return std::all_of(Rest.begin(), Rest.end(), + [](auto Word) { return Word == 0; }); + } + + template [[nodiscard]] std::string str() { + auto BV = BitVectorSet::fromBits(Taints); + return LToString(BV); + } +}; + +template <> struct JoinLatticeTraits { + inline static IDEFeatureTaintEdgeFact top() { return 0; } + inline static IDEFeatureTaintEdgeFact bottom() { + // TODO + return 0; + } + inline static IDEFeatureTaintEdgeFact join(const IDEFeatureTaintEdgeFact &L, + const IDEFeatureTaintEdgeFact &R) { + auto Ret = L; + Ret.Taints |= R.Taints; + return Ret; + } +}; + +struct IDEFeatureTaintAnalysisDomain : LLVMAnalysisDomainDefault { + using l_t = IDEFeatureTaintEdgeFact; +}; + +class FeatureTaintGenerator { +public: + using InstOrGlobal = + std::variant; + + using GenerateTaintsFn = + llvm::unique_function; + using IsSourceFn = llvm::unique_function; + using PrinterFn = + llvm::unique_function; + + template + static GenerateTaintsFn createGenerateTaints(EdgeFactGenerator &&EFGen) { + return [EFGen{std::forward(EFGen)}](InstOrGlobal IG) { + const auto &TaintSet = std::invoke(EFGen, IG); + BitVectorSet> BV( + llvm::adl_begin(TaintSet), llvm::adl_end(TaintSet)); + return IDEFeatureTaintEdgeFact{std::move(BV).getBits()}; + }; + } + + template + static PrinterFn createEdgeFactPrinter() { + using ElemTy = + ElementType>; + return [](const IDEFeatureTaintEdgeFact &Fact) { + auto BV = + BitVectorSet::fromBits(Fact.Taints); + return LToString(BV); + }; + } + + FeatureTaintGenerator(IsSourceFn IsFeatureSource, + GenerateTaintsFn GenerateTaints, PrinterFn Printer) + : IsFeatureSource(std::move(IsFeatureSource)), + GenerateTaints(std::move(GenerateTaints)), Printer(std::move(Printer)) { + } + + template >>> + FeatureTaintGenerator(EdgeFactGenerator &&EFGen) + : IsFeatureSource([EFGen{EFGen}](InstOrGlobal IG) { + return !llvm::empty(std::invoke(EFGen, IG)); + }), + GenerateTaints( + createGenerateTaints(std::forward(EFGen))), + Printer(createEdgeFactPrinter()) {} + + template + FeatureTaintGenerator(SourceDetector &&SrcDetector, EdgeFactGenerator &&EFGen) + : IsFeatureSource(std::forward(SrcDetector)), + GenerateTaints( + createGenerateTaints(std::forward(EFGen))), + Printer(createEdgeFactPrinter()) {} + + [[nodiscard]] bool isSource(InstOrGlobal IG) const { + return IsFeatureSource(IG); + } + + [[nodiscard]] IDEFeatureTaintEdgeFact + getGeneratedTaintsAt(InstOrGlobal IG) const { + return GenerateTaints(IG); + } + + [[nodiscard]] std::string + toString(const IDEFeatureTaintEdgeFact &Fact) const { + return Printer(Fact); + } + +private: + IsSourceFn IsFeatureSource; + GenerateTaintsFn GenerateTaints; + PrinterFn Printer; +}; + +class IDEFeatureTaintAnalysis + : public IDETabulationProblem { + +public: + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + FeatureTaintGenerator &&TaintGen); + + template + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + EdgeFactGenerator &&EFGen) + : IDEFeatureTaintAnalysis( + IRDB, PT, std::move(EntryPoints), + FeatureTaintGenerator(std::forward(EFGen))) {} + + template + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + SourceDetector &&SrcDetector, + EdgeFactGenerator &&EFGen) + : IDEFeatureTaintAnalysis( + IRDB, PT, std::move(EntryPoints), + FeatureTaintGenerator(std::forward(SrcDetector), + std::forward(EFGen))) {} + + ////////////////////////////////////////////////////////////////////////////// + /// Flow Functions + ////////////////////////////////////////////////////////////////////////////// + + FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) override; + + FlowFunctionPtrType getCallFlowFunction(n_t CallSite, f_t DestFun) override; + + FlowFunctionPtrType getRetFlowFunction(n_t CallSite, f_t CalleeFun, + n_t ExitInst, n_t RetSite) override; + FlowFunctionPtrType + getCallToRetFlowFunction(n_t CallSite, n_t RetSite, + llvm::ArrayRef Callees) override; + // FlowFunctionPtrType getSummaryFlowFunction(n_t CallSite, + // f_t DestFun) override; + + ////////////////////////////////////////////////////////////////////////////// + /// Edge Functions + ////////////////////////////////////////////////////////////////////////////// + + EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, + d_t SuccNode) override; + + EdgeFunction getCallEdgeFunction(n_t CallSite, d_t SrcNode, + f_t DestinationFunction, + d_t DestNode) override; + + EdgeFunction getReturnEdgeFunction(n_t CallSite, f_t CalleeFunction, + n_t ExitStmt, d_t ExitNode, + n_t RetSite, d_t RetNode) override; + + EdgeFunction + getCallToRetEdgeFunction(n_t CallSite, d_t CallNode, n_t RetSite, + d_t RetSiteNode, + llvm::ArrayRef Callees) override; + + // EdgeFunction getSummaryEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, + // d_t SuccNode) override; + + ////////////////////////////////////////////////////////////////////////////// + /// Misc + ////////////////////////////////////////////////////////////////////////////// + + InitialSeeds initialSeeds() override; + + bool isZeroValue(d_t FlowFact) const noexcept override; + + void emitTextReport(const SolverResults &SR, + llvm::raw_ostream &OS = llvm::outs()) override; + +private: + FeatureTaintGenerator TaintGen; + LLVMAliasInfoRef PT; +}; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEFEATURETAINTANALYSIS_H diff --git a/include/phasar/Utils/BitVectorSet.h b/include/phasar/Utils/BitVectorSet.h index 418d90e603..d1accc77c1 100644 --- a/include/phasar/Utils/BitVectorSet.h +++ b/include/phasar/Utils/BitVectorSet.h @@ -56,19 +56,22 @@ inline bool isLess(const llvm::BitVector &Lhs, const llvm::BitVector &Rhs) { * * @brief Implements a set that requires minimal space. */ -template class BitVectorSet { -private: +template +class BitVectorSet { +public: // Using boost::hash causes ambiguity for hash_value(): // - // - // - using bimap_t = boost::bimap>, boost::bimaps::unordered_set_of>; + +private: inline static bimap_t Position; // NOLINT - llvm::BitVector Bits; + BitVectorTy Bits; template class BitVectorSetIterator { - llvm::BitVector Bits; + BitVectorTy Bits; public: using iterator_category = std::forward_iterator_tag; @@ -83,7 +86,7 @@ template class BitVectorSet { return *this; } - void setBits(const llvm::BitVector &OtherBits) { Bits = OtherBits; } + void setBits(const BitVectorTy &OtherBits) { Bits = OtherBits; } bool operator==(const BitVectorSetIterator &OtherIterator) const { return PosPtr == OtherIterator.getPtr(); @@ -152,7 +155,7 @@ template class BitVectorSet { // T getVal() {return pos_ptr->second;} - [[nodiscard]] llvm::BitVector getBits() const { return Bits; } + [[nodiscard]] BitVectorTy getBits() const { return Bits; } private: D PosPtr; @@ -176,6 +179,12 @@ template class BitVectorSet { insert(First, Last); } + static BitVectorSet fromBits(BitVectorTy Bits) { + BitVectorSet Ret; + Ret.Bits = std::move(Bits); + return Ret; + } + [[nodiscard]] BitVectorSet setUnion(const BitVectorSet &Other) const { size_t MaxSize = std::max(Bits.size(), Other.Bits.size()); BitVectorSet Res; @@ -271,6 +280,10 @@ template class BitVectorSet { [[nodiscard]] size_t size() const noexcept { return Bits.count(); } + [[nodiscard]] const BitVectorTy &getBits() const &noexcept { return Bits; } + [[nodiscard]] BitVectorTy &getBits() &noexcept { return Bits; } + [[nodiscard]] BitVectorTy &&getBits() &&noexcept { return std::move(Bits); } + friend bool operator==(const BitVectorSet &Lhs, const BitVectorSet &Rhs) { bool LeftEmpty = Lhs.empty(); bool RightEmpty = Rhs.empty(); diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index f4c6736f28..141270dfa8 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -170,6 +171,12 @@ struct variant_idx, T> size_t, std::variant...>(type_identity{}).index()> {}; +template struct ElementType { + using IteratorTy = + std::decay_t()))>; + using type = typename std::iterator_traits::value_type; +}; + } // namespace detail template @@ -248,6 +255,9 @@ template using type_identity_t = typename type_identity::type; template static constexpr size_t variant_idx = detail::variant_idx::value; +template +using ElementType = typename detail::ElementType::type; + struct TrueFn { template [[nodiscard]] bool operator()(const Args &.../*unused*/) const noexcept { diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp new file mode 100644 index 0000000000..3dd3c6aad0 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -0,0 +1,566 @@ +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" + +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/BitVectorSet.h" +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Printer.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" + +#include +#include + +using namespace psr; + +using l_t = IDEFeatureTaintAnalysisDomain::l_t; +using d_t = IDEFeatureTaintAnalysisDomain::d_t; + +IDEFeatureTaintAnalysis::IDEFeatureTaintAnalysis( + const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, FeatureTaintGenerator &&TaintGen) + : IDETabulationProblem( + IRDB, std::move(EntryPoints), LLVMZeroValue::getInstance()), + TaintGen(std::move(TaintGen)), PT(PT) {} + +auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) + -> FlowFunctionPtrType { + bool GeneratesFact = TaintGen.isSource(Curr); + + if (const auto *Load = llvm::dyn_cast(Curr)) { + return generateFlowIf( + Load, + [PointerOp = Load->getPointerOperand(), + PTS = PT.getReachableAllocationSites(Load->getPointerOperand(), true)]( + d_t Src) { return Src == PointerOp || PTS->count(Src); }); + } + + if (const auto *Store = llvm::dyn_cast(Curr)) { + return lambdaFlow([Store, + PointerPTS = PT.getReachableAllocationSites( + Store->getPointerOperand(), true, Store), + GeneratesFact](d_t Src) -> container_type { + if (Store->getPointerOperand() == Src || PointerPTS->count(Src)) { + // Here, we are unsound! + return {}; + } + container_type Facts; + Facts.insert(Src); + // y/Y now obtains its new value(s) from x/X + // If a value is stored that holds we must generate all potential + // memory locations the store might write to. + if (Store->getValueOperand() == Src) { + Facts.insert(Store->getPointerOperand()); + Facts.insert(PointerPTS->begin(), PointerPTS->end()); + } + // ... or from zero, if we manually generate a fact here + if (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) { + Facts.insert(Store->getPointerOperand()); + Facts.insert(PointerPTS->begin(), PointerPTS->end()); + } + return Facts; + }); + } + + // Fallback + return lambdaFlow([Inst = Curr](d_t Src) { + container_type Facts; + Facts.insert(Src); + if (LLVMZeroValue::isLLVMZeroValue(Src)) { + // keep the zero flow fact + return Facts; + } + + // continue syntactic propagation: populate and propagate other existing + // facts + for (const auto &Op : Inst->operands()) { + // if one of the operands holds, also generate the instruction using + // it + if (Op == Src) { + Facts.insert(Inst); + } + } + return Facts; + }); +} + +auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) + -> FlowFunctionPtrType { + + if (DestFun->isDeclaration()) { + // We don't have anything that we could analyze, kill all facts. + return killAllFlows(); + } + + const auto *CS = llvm::cast(CallSite); + + // Map actual to formal parameters. + return mapFactsToCallee( + CS, DestFun, [CS](const llvm::Value *ActualArg, ByConstRef Src) { + if (d_t(ActualArg) != Src) { + return false; + } + + if (CS->hasStructRetAttr() && ActualArg == CS->getArgOperand(0)) { + return false; + } + + return true; + }); +} + +auto IDEFeatureTaintAnalysis::getRetFlowFunction(n_t CallSite, + f_t /*CalleeFun*/, + n_t ExitInst, + n_t /* RetSite */) + -> FlowFunctionPtrType { + // Map return value back to the caller. If pointer parameters hold at the + // end of a callee function generate all of those in the caller context. + if (CallSite == nullptr) { + return killAllFlows(); + } + + bool GeneratesFact = TaintGen.isSource(ExitInst); + return mapFactsToCaller( + llvm::cast(CallSite), ExitInst, {}, + [GeneratesFact](const llvm::Value *RetVal, d_t Src) { + if (Src == RetVal) { + return true; + } + if (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) { + return true; + } + return false; + }); +} + +auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( + n_t CallSite, n_t /* RetSite */, llvm::ArrayRef Callees) + -> FlowFunctionPtrType { + + bool GeneratesFact = + !CallSite->getType()->isVoidTy() && TaintGen.isSource(CallSite); + + if (llvm::all_of(Callees, [](f_t Fun) { return Fun->isDeclaration(); })) { + if (GeneratesFact) { + return generateFromZero(CallSite); + } + return identityFlow(); + } + + auto Mapper = mapFactsAlongsideCallSite( + llvm::cast(CallSite), + [](d_t Arg) { return !Arg->getType()->isPointerTy(); }, + /*PropagateGlobals*/ false); + + if (GeneratesFact) { + unionFlows(std::move(Mapper), + generateFlowAndKillAllOthers(CallSite, getZeroValue())); + } + return Mapper; +} + +namespace { + +struct AddFactsEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; + + IDEFeatureTaintEdgeFact Facts; + + [[nodiscard]] l_t computeTarget(l_t Source) const { + Source.Taints |= Facts.Taints; + return Source; + } + + static EdgeFunction compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction); + + static EdgeFunction join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction); + + friend bool operator==(const AddFactsEF &L, const AddFactsEF &R) { + return L.Facts == R.Facts; + } + + // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL + friend llvm::hash_code hash_value(const AddFactsEF &EF) { + return hash_value(EF.Facts); + } +}; + +struct GenerateEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; + + IDEFeatureTaintEdgeFact Facts; + + [[nodiscard]] bool isConstant() const noexcept { return true; } + + [[nodiscard]] l_t computeTarget(ByConstRef /*Source*/) const { + return Facts; + } + + static EdgeFunction compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction); + + static EdgeFunction join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction); + + friend bool operator==(const GenerateEF &L, const GenerateEF &R) { + return L.Facts == R.Facts; + } + + // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL + friend llvm::hash_code hash_value(const GenerateEF &EF) { + return hash_value(EF.Facts); + } +}; + +struct AddSmallFactsEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; + + uintptr_t Facts{}; + + [[nodiscard]] l_t computeTarget(l_t Source) const { + Source.unionWith(Facts); + return Source; + } + + static EdgeFunction compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction); + + static EdgeFunction join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction); + + friend bool operator==(const AddSmallFactsEF &L, const AddSmallFactsEF &R) { + return L.Facts == R.Facts; + } + + // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL + friend llvm::hash_code hash_value(const AddSmallFactsEF &EF) { + return llvm::hash_value(EF.Facts); + } +}; + +struct GenerateSmallEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; + + uintptr_t Facts{}; + + [[nodiscard]] bool isConstant() const noexcept { return true; } + + [[nodiscard]] l_t computeTarget(ByConstRef /*Source*/) const { + return Facts; + } + + static EdgeFunction compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction); + + static EdgeFunction join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction); + + // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL + friend llvm::hash_code hash_value(const GenerateSmallEF &EF) { + return llvm::hash_value(EF.Facts); + } + + friend bool operator==(GenerateSmallEF L, GenerateSmallEF R) { + return L.Facts == R.Facts; + } +}; + +auto GenerateSmallEF::compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction) + -> EdgeFunction { + if (auto Default = defaultComposeOrNull(This, SecondFunction)) { + return Default; + } + + auto Val = SecondFunction.computeTarget(This->computeTarget(0)); + + if (Val.Taints.isSmall()) { + uintptr_t Buf{}; + std::ignore = Val.Taints.getData(Buf); + return GenerateSmallEF{Buf}; + } + + // TODO: Caching + + return GenerateEF{std::move(Val)}; +} + +auto AddSmallFactsEF::compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction) + -> EdgeFunction { + if (auto Default = defaultComposeOrNull(This, SecondFunction)) { + return Default; + } + + auto Val = SecondFunction.computeTarget(This->computeTarget(0)); + + if (Val.Taints.isSmall()) { + uintptr_t Buf{}; + std::ignore = Val.Taints.getData(Buf); + return AddSmallFactsEF{Buf}; + } + + // TODO: Caching + + return AddFactsEF{std::move(Val)}; +} + +auto GenerateEF::compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction) + -> EdgeFunction { + if (auto Default = defaultComposeOrNull(This, SecondFunction)) { + return Default; + } + + auto Val = SecondFunction.computeTarget(This->computeTarget(0)); + + // TODO: Caching + + return GenerateEF{std::move(Val)}; +} + +auto AddFactsEF::compose(EdgeFunctionRef This, + const EdgeFunction &SecondFunction) + -> EdgeFunction { + if (auto Default = defaultComposeOrNull(This, SecondFunction)) { + return Default; + } + + auto Val = SecondFunction.computeTarget(This->computeTarget(0)); + + // TODO: Caching + + return AddFactsEF{std::move(Val)}; +} + +template +EdgeFunction joinWithGen(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) { + if (auto Default = defaultJoinOrNull(This, OtherFunction)) { + return Default; + } + + auto OtherFacts = OtherFunction.computeTarget(0); + OtherFacts.unionWith(This->Facts); + + if (OtherFacts.Taints.isSmall()) { + uintptr_t Buf{}; + std::ignore = OtherFacts.Taints.getData(Buf); + + if (OtherFunction.isConstant()) { + return GenerateSmallEF{Buf}; + } + + return AddSmallFactsEF{Buf}; + } + + // TODO: Caching + + if (OtherFunction.isConstant()) { + return GenerateEF{std::move(OtherFacts)}; + } + + return AddFactsEF{std::move(OtherFacts)}; +} + +template +EdgeFunction joinWithAdd(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) { + /// XXX: Here, we underapproximate joins with EdgeIdentity + if (llvm::isa>(OtherFunction)) { + return This; + } + + if (auto Default = defaultJoinOrNull(This, OtherFunction)) { + return Default; + } + + auto OtherFacts = OtherFunction.computeTarget(0); + OtherFacts.unionWith(This->Facts); + + if (OtherFacts.Taints.isSmall()) { + uintptr_t Buf{}; + std::ignore = OtherFacts.Taints.getData(Buf); + + return AddSmallFactsEF{Buf}; + } + + // TODO: Caching + + return AddFactsEF{std::move(OtherFacts)}; +} + +auto GenerateSmallEF::join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) + -> EdgeFunction { + return joinWithGen(This, OtherFunction); +} +auto GenerateEF::join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) + -> EdgeFunction { + return joinWithGen(This, OtherFunction); +} + +auto AddSmallFactsEF::join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) + -> EdgeFunction { + return joinWithAdd(This, OtherFunction); +} + +auto AddFactsEF::join(EdgeFunctionRef This, + const EdgeFunction &OtherFunction) + -> EdgeFunction { + return joinWithAdd(This, OtherFunction); +} + +/// + +EdgeFunction genEF(l_t &&Facts) { + if (Facts.Taints.isSmall()) { + uintptr_t Buf{}; + std::ignore = Facts.Taints.getData(Buf); + return GenerateSmallEF{Buf}; + } + return GenerateEF{std::move(Facts)}; +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, + n_t /* Succ */, + d_t SuccNode) + -> EdgeFunction { + + if (isZeroValue(SuccNode) || CurrNode == SuccNode) { + // We don't want to propagate any facts on zero + return EdgeIdentity{}; + } + + if (isZeroValue(CurrNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(Curr)); + } + + // Overrides at store instructions + if (const auto *Store = llvm::dyn_cast(Curr)) { + if (CurrNode == Store->getValueOperand()) { + // Store tainted value + + // propagate facts unchanged. User edge-facts are generated from zero. + return EdgeIdentity{}; + } + } + + // Otherwise stick to identity. + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::getCallEdgeFunction(n_t CallSite, d_t SrcNode, + f_t /*DestinationFunction*/, + d_t DestNode) + -> EdgeFunction { + if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(CallSite)); + } + + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( + n_t CallSite, f_t /*CalleeFunction*/, n_t /*ExitStmt*/, d_t ExitNode, + n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { + if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(CallSite)); + } + + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( + n_t /*CallSite*/, d_t /*CallNode*/, n_t /*RetSite*/, d_t /*RetSiteNode*/, + llvm::ArrayRef /*Callees*/) -> EdgeFunction { + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { + InitialSeeds Seeds; + + LLVMBasedCFG CFG; + forallStartingPoints(this->EntryPoints, IRDB, CFG, [this, &Seeds](n_t SP) { + // Set initial seeds at the required entry points and generate the global + // variables using generalized initial seeds + + // Generate zero value at the entry points + Seeds.addSeed(SP, this->getZeroValue(), 0); + // Generate formal parameters of entry points, e.g. main(). Formal + // parameters will otherwise cause trouble by overriding alloca + // instructions without being valid data-flow facts themselves. + + /// TODO: Do we want that? + // for (const auto &Arg : SP->getFunction()->args()) { + // Seeds.addSeed(SP, &Arg, BitVectorSet()); + // } + // Generate all global variables using generalized initial seeds + + for (const auto &G : this->IRDB->getModule()->globals()) { + if (const auto *GV = llvm::dyn_cast(&G)) { + l_t InitialValues = TaintGen.getGeneratedTaintsAt(GV); + if (InitialValues.Taints.any()) { + Seeds.addSeed(SP, GV, std::move(InitialValues)); + } + } + } + }); + + return Seeds; +} + +bool IDEFeatureTaintAnalysis::isZeroValue(d_t FlowFact) const noexcept { + return LLVMZeroValue::isLLVMZeroValue(FlowFact); +} + +void IDEFeatureTaintAnalysis::emitTextReport( + const SolverResults &SR, llvm::raw_ostream &OS) { + OS << "\n====================== IDE-Inst-Interaction-Analysis Report " + "======================\n"; + + for (const auto *F : IRDB->getAllFunctions()) { + auto FunName = F->getName(); + OS << "\nFunction: " << FunName << "\n----------" + << std::string(FunName.size(), '-') << '\n'; + + for (const auto &Inst : llvm::instructions(F)) { + auto Results = SR.resultsAt(&Inst, true); + // stripBottomResults(Results); + if (!Results.empty()) { + OS << "At IR statement: " << NToString(Inst) << '\n'; + for (const auto &Result : Results) { + if (!Result.second.isBottom()) { + OS << " Fact: " << DToString(Result.first) + << "\n Value: " << TaintGen.toString(Result.second) << '\n'; + } + } + OS << '\n'; + } + } + OS << '\n'; + } +} From 4f7e5937be009ba43b405e00115334adc874c924 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 26 Mar 2024 08:59:16 +0100 Subject: [PATCH 02/33] gen from zero in CTR-EF --- .../DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index 3dd3c6aad0..a3872aafaa 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -495,8 +495,12 @@ auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( } auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( - n_t /*CallSite*/, d_t /*CallNode*/, n_t /*RetSite*/, d_t /*RetSiteNode*/, + n_t CallSite, d_t CallNode, n_t /*RetSite*/, d_t RetSiteNode, llvm::ArrayRef /*Callees*/) -> EdgeFunction { + if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(CallSite)); + } return EdgeIdentity{}; } From e6598b42a4391526de6bc259dce1755444ed78d3 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 26 Mar 2024 11:34:03 +0100 Subject: [PATCH 03/33] Add alloca flow --- .../DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index a3872aafaa..ecfcf64e36 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -38,6 +38,13 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) -> FlowFunctionPtrType { bool GeneratesFact = TaintGen.isSource(Curr); + if (const auto *Alloca = llvm::dyn_cast(Curr)) { + if (GeneratesFact) { + return generateFromZero(Alloca); + } + return identityFlow(); + } + if (const auto *Load = llvm::dyn_cast(Curr)) { return generateFlowIf( Load, From 3ea6631aaced974fbca3830822e4a92197bac080 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 26 Mar 2024 11:40:03 +0100 Subject: [PATCH 04/33] generate at fallback insts --- .../DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index ecfcf64e36..6c2ff9705d 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -81,11 +81,13 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) } // Fallback - return lambdaFlow([Inst = Curr](d_t Src) { + return lambdaFlow([Inst = Curr, GeneratesFact](d_t Src) { container_type Facts; Facts.insert(Src); if (LLVMZeroValue::isLLVMZeroValue(Src)) { - // keep the zero flow fact + if (GeneratesFact) { + Facts.insert(Inst); + } return Facts; } From 79c46cffbfa3642b89f85f297a24ed359e1cb7ea Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 18 Apr 2024 19:12:07 +0200 Subject: [PATCH 05/33] Tie the EF semi-ring to the problem + integrate into solver --- .../DataFlow/IfdsIde/IDETabulationProblem.h | 2 ++ .../DataFlow/IfdsIde/Solver/IDESolver.h | 21 ++++++------ include/phasar/Utils/SemiRing.h | 34 +++++++++++++++++++ 3 files changed, 47 insertions(+), 10 deletions(-) create mode 100644 include/phasar/Utils/SemiRing.h diff --git a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h index 4cdc3610f9..5d3dc71f6d 100644 --- a/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h +++ b/include/phasar/DataFlow/IfdsIde/IDETabulationProblem.h @@ -22,6 +22,7 @@ #include "phasar/Utils/JoinLattice.h" #include "phasar/Utils/NullAnalysisPrinter.h" #include "phasar/Utils/Printer.h" +#include "phasar/Utils/SemiRing.h" #include "phasar/Utils/Soundness.h" #include "llvm/ADT/StringRef.h" @@ -62,6 +63,7 @@ template , public EdgeFunctions, public JoinLattice, + public SemiRing, public AllTopFnProvider { public: using ProblemAnalysisDomain = AnalysisDomainTy; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index 3274ced1a8..9f10b10e28 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -406,7 +406,7 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f << '\n'); WorkList.emplace_back(PathEdge(d1, ReturnSiteN, std::move(d3)), - f.composeWith(SumEdgFnE)); + IDEProblem.extend(f, SumEdgFnE)); } } } else { @@ -498,15 +498,15 @@ class IDESolver << f4); PHASAR_LOG_LEVEL(DEBUG, " (return * calleeSummary * call)"); - EdgeFunction fPrime = - f4.composeWith(fCalleeSummary).composeWith(f5); + EdgeFunction fPrime = IDEProblem.extend( + IDEProblem.extend(f4, fCalleeSummary), f5); PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); d_t d5_restoredCtx = restoreContextOnReturnedFact(n, d2, d5); // propagte the effects of the entire call PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); WorkList.emplace_back( PathEdge(d1, RetSiteN, std::move(d5_restoredCtx)), - f.composeWith(fPrime)); + IDEProblem.extend(f, fPrime)); } } } @@ -538,7 +538,7 @@ class IDESolver .push_back(EdgeFnE); } INC_COUNTER("EF Queries", 1, Full); - auto fPrime = f.composeWith(EdgeFnE); + auto fPrime = IDEProblem.extend(f, EdgeFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " << fPrime); WorkList.emplace_back(PathEdge(d1, ReturnSiteN, std::move(d3)), @@ -570,7 +570,7 @@ class IDESolver EdgeFunction g = CachedFlowEdgeFunctions.getNormalEdgeFunction(n, d2, nPrime, d3); PHASAR_LOG_LEVEL(DEBUG, "Queried Normal Edge Function: " << g); - EdgeFunction fPrime = f.composeWith(g); + EdgeFunction fPrime = IDEProblem.extend(f, g); if (SolverConfig.emitESG()) { IntermediateEdgeFunctions[std::make_tuple(n, d2, nPrime, d3)] .push_back(g); @@ -950,7 +950,8 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " << f << " * " << f4); PHASAR_LOG_LEVEL(DEBUG, " (return * function * call)"); - EdgeFunction fPrime = f4.composeWith(f).composeWith(f5); + EdgeFunction fPrime = + IDEProblem.extend(IDEProblem.extend(f4, f), f5); PHASAR_LOG_LEVEL(DEBUG, " = " << fPrime); // for each jump function coming into the call, propagate to // return site using the composed function @@ -965,7 +966,7 @@ class IDESolver PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); WorkList.emplace_back(PathEdge(std::move(d3), RetSiteC, std::move(d5_restoredCtx)), - f3.composeWith(fPrime)); + IDEProblem.extend(f3, fPrime)); } } } @@ -1004,7 +1005,7 @@ class IDESolver } INC_COUNTER("EF Queries", 1, Full); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << f5 << " * " << f); - propagteUnbalancedReturnFlow(RetSiteC, d5, f.composeWith(f5), + propagteUnbalancedReturnFlow(RetSiteC, d5, IDEProblem.extend(f, f5), Caller); // register for value processing (2nd IDE phase) UnbalancedRetSites.insert(RetSiteC); @@ -1153,7 +1154,7 @@ class IDESolver // was found return AllTop; }(); - EdgeFunction fPrime = JumpFnE.joinWith(f); + EdgeFunction fPrime = IDEProblem.combine(JumpFnE, f); bool NewFunction = fPrime != JumpFnE; IF_LOG_LEVEL_ENABLED(DEBUG, { diff --git a/include/phasar/Utils/SemiRing.h b/include/phasar/Utils/SemiRing.h new file mode 100644 index 0000000000..e4999d329e --- /dev/null +++ b/include/phasar/Utils/SemiRing.h @@ -0,0 +1,34 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_UTILS_SEMIRING_H +#define PHASAR_UTILS_SEMIRING_H + +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" + +namespace psr { +template class SemiRing { +public: + using l_t = typename AnalysisDomainTy::l_t; + + virtual ~SemiRing() = default; + + virtual EdgeFunction extend(const EdgeFunction &L, + const EdgeFunction &R) { + return L.composeWith(R); + } + + virtual EdgeFunction combine(const EdgeFunction &L, + const EdgeFunction &R) { + return L.joinWith(R); + } +}; +} // namespace psr + +#endif // PHASAR_UTILS_SEMIRING_H From dd68906a44bd8e2a4f5f88ad2fabbd03b7dc0896 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Thu, 18 Apr 2024 19:45:08 +0200 Subject: [PATCH 06/33] Incorporate combine and extend into IIA --- .../DataFlow/IfdsIde/EdgeFunctionUtils.h | 36 ++++ .../Problems/IDEInstInteractionAnalysis.h | 170 ++++++++---------- 2 files changed, 108 insertions(+), 98 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h index de9db609a6..7c8a7ebcba 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h @@ -132,6 +132,22 @@ defaultComposeOrNull(EdgeFunctionRef This, return nullptr; } +template +EdgeFunction +defaultComposeOrNull(const EdgeFunction &This, + const EdgeFunction &SecondFunction) noexcept { + if (llvm::isa>(SecondFunction)) { + return This; + } + if (SecondFunction.isConstant() || llvm::isa>(This)) { + return SecondFunction; + } + if (llvm::isa>(This)) { + return This; + } + return nullptr; +} + template struct ConstantEdgeFunction { using l_t = L; using JLattice = JoinLatticeTraits; @@ -409,6 +425,26 @@ EdgeFunction defaultJoinOrNull(EdgeFunctionRef This, return nullptr; } +template +EdgeFunction defaultJoinOrNull(const EdgeFunction &This, + const EdgeFunction &OtherFunction) { + if (llvm::isa>(OtherFunction) || llvm::isa>(This)) { + return OtherFunction; + } + if (llvm::isa>(OtherFunction) || OtherFunction == This || + llvm::isa>(This)) { + return This; + } + if (llvm::isa>(OtherFunction)) { + if constexpr (N > 0) { + return JoinEdgeFunction::create(This, OtherFunction); + } else if constexpr (HasJoinLatticeTraits) { + return AllBottom{}; + } + } + return nullptr; +} + template EdgeFunction EdgeIdentity::join(EdgeFunctionRef This, const EdgeFunction &OtherFunction) { diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h index 40d1dff446..fb70d9d964 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h @@ -11,6 +11,7 @@ #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEINSTINTERACTIONANALYSIS_H #include "phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" #include "phasar/DataFlow/IfdsIde/FlowFunctions.h" #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" @@ -949,59 +950,15 @@ class IDEInstInteractionAnalysisT l_t computeTarget(ByConstRef /* Src */) const { return Replacement; } - static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction SecondFunction) { - - if (auto Default = defaultComposeOrNull(This, SecondFunction)) { - return Default; - } - - auto Cache = This.getCacheOrNull(); - assert(Cache != nullptr && "We expect a cache, because " - "IIAAKillOrReplaceEF is too large for SOO"); - - if (auto *AD = llvm::dyn_cast(SecondFunction)) { - auto Union = - IDEInstInteractionAnalysisT::joinImpl(This->Replacement, AD->Data); - return Cache->createEdgeFunction(std::move(Union)); - } - - if (auto *KR = llvm::dyn_cast(SecondFunction)) { - return SecondFunction; - } - llvm::report_fatal_error( - "found unexpected edge function in 'IIAAKillOrReplaceEF'"); + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction /*SecondFunction*/) { + llvm::report_fatal_error("Implemented in 'extend'"); } - static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { - /// XXX: Here, we underapproximate joins with EdgeIdentity - if (llvm::isa>(OtherFunction)) { - return This; - } - - if (auto Default = defaultJoinOrNull(This, OtherFunction)) { - return Default; - } - - auto Cache = This.getCacheOrNull(); - assert(Cache != nullptr && "We expect a cache, because " - "IIAAKillOrReplaceEF is too large for SOO"); - - if (auto *AD = llvm::dyn_cast(OtherFunction)) { - auto ADCache = OtherFunction.template getCacheOrNull(); - assert(ADCache); - auto Union = - IDEInstInteractionAnalysisT::joinImpl(This->Replacement, AD->Data); - return ADCache->createEdgeFunction(std::move(Union)); - } - if (auto *KR = llvm::dyn_cast(OtherFunction)) { - auto Union = IDEInstInteractionAnalysisT::joinImpl(This->Replacement, - KR->Replacement); - return Cache->createEdgeFunction(std::move(Union)); - } - llvm::report_fatal_error( - "found unexpected edge function in 'IIAAKillOrReplaceEF'"); + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { + llvm::report_fatal_error("Implemented in 'combine'"); } bool operator==(const IIAAKillOrReplaceEF &Other) const noexcept { @@ -1044,55 +1001,15 @@ class IDEInstInteractionAnalysisT return IDEInstInteractionAnalysisT::joinImpl(Src, Data); } - static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) { - if (auto Default = defaultComposeOrNull(This, SecondFunction)) { - return Default; - } - - auto Cache = This.getCacheOrNull(); - assert(Cache != nullptr && "We expect a cache, because " - "IIAAAddLabelsEF is too large for SOO"); - - if (auto *AD = llvm::dyn_cast(SecondFunction)) { - auto Union = - IDEInstInteractionAnalysisT::joinImpl(This->Data, AD->Data); - return Cache->createEdgeFunction(std::move(Union)); - } - if (auto *KR = llvm::dyn_cast(SecondFunction)) { - return SecondFunction; - } - llvm::report_fatal_error( - "found unexpected edge function in 'IIAAAddLabelsEF'"); + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { + llvm::report_fatal_error("Implemented in 'extend'"); } - static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { - /// XXX: Here, we underapproximate joins with EdgeIdentity - if (llvm::isa>(OtherFunction)) { - return This; - } - - if (auto Default = defaultJoinOrNull(This, OtherFunction)) { - return Default; - } - - auto Cache = This.getCacheOrNull(); - assert(Cache != nullptr && "We expect a cache, because " - "IIAAAddLabelsEF is too large for SOO"); - - if (auto *AD = llvm::dyn_cast(OtherFunction)) { - auto Union = - IDEInstInteractionAnalysisT::joinImpl(This->Data, AD->Data); - return Cache->createEdgeFunction(std::move(Union)); - } - if (auto *KR = llvm::dyn_cast(OtherFunction)) { - auto Union = - IDEInstInteractionAnalysisT::joinImpl(This->Data, KR->Replacement); - return Cache->createEdgeFunction(std::move(Union)); - } - llvm::report_fatal_error( - "found unexpected edge function in 'IIAAAddLabelsEF'"); + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { + llvm::report_fatal_error("Implemented in 'combine'"); } bool operator==(const IIAAAddLabelsEF &Other) const noexcept { @@ -1112,6 +1029,63 @@ class IDEInstInteractionAnalysisT } }; + const auto &getData(const EdgeFunction &EF) { + if (const auto *AddLabels = llvm::dyn_cast(EF)) { + return AddLabels->Data; + } + if (const auto *KillOrReplace = llvm::dyn_cast(EF)) { + return KillOrReplace->Replacement; + } + llvm::report_fatal_error( + "found unexpected first edge function in 'getData'"); + } + + EdgeFunction extend(const EdgeFunction &FirstFunction, + const EdgeFunction &SecondFunction) override { + if (auto Default = defaultComposeOrNull(FirstFunction, SecondFunction)) { + return Default; + } + + const auto &ThisData = getData(FirstFunction); + + if (auto *AD = llvm::dyn_cast(SecondFunction)) { + auto Union = IDEInstInteractionAnalysisT::joinImpl(ThisData, AD->Data); + return llvm::isa(FirstFunction) + ? IIAAAddLabelsEFCache.createEdgeFunction(std::move(Union)) + : IIAAKillOrReplaceEFCache.createEdgeFunction( + std::move(Union)); + } + + llvm::report_fatal_error( + "found unexpected second edge function in 'extend'"); + } + + EdgeFunction combine(const EdgeFunction &FirstFunction, + const EdgeFunction &OtherFunction) override { + /// XXX: Here, we underapproximate joins with EdgeIdentity + if (llvm::isa>(FirstFunction)) { + return OtherFunction; + } + if (llvm::isa>(OtherFunction)) { + return FirstFunction; + } + + if (auto Default = defaultJoinOrNull(FirstFunction, OtherFunction)) { + return Default; + } + + const auto &ThisData = getData(FirstFunction); + const auto &OtherData = getData(OtherFunction); + auto Union = IDEInstInteractionAnalysisT::joinImpl(ThisData, OtherData); + + if (llvm::isa(FirstFunction)) { + if (llvm::isa(OtherFunction)) { + return IIAAKillOrReplaceEFCache.createEdgeFunction(std::move(Union)); + } + } + return IIAAAddLabelsEFCache.createEdgeFunction(std::move(Union)); + } + // Provide functionalities for printing things and emitting text reports. static void stripBottomResults(std::unordered_map &Res) { From ad25ac4c9608cfc1ba3af151ea9db64220b618e0 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 20 Apr 2024 14:19:32 +0200 Subject: [PATCH 07/33] minor --- .../IfdsIde/Problems/IDEInstInteractionAnalysis.h | 9 ++++----- include/phasar/Utils/BitVectorSet.h | 10 +++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h index fb70d9d964..43060beccf 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h @@ -1078,10 +1078,9 @@ class IDEInstInteractionAnalysisT const auto &OtherData = getData(OtherFunction); auto Union = IDEInstInteractionAnalysisT::joinImpl(ThisData, OtherData); - if (llvm::isa(FirstFunction)) { - if (llvm::isa(OtherFunction)) { - return IIAAKillOrReplaceEFCache.createEdgeFunction(std::move(Union)); - } + if (llvm::isa(FirstFunction) && + llvm::isa(OtherFunction)) { + return IIAAKillOrReplaceEFCache.createEdgeFunction(std::move(Union)); } return IIAAAddLabelsEFCache.createEdgeFunction(std::move(Union)); } @@ -1147,7 +1146,7 @@ class IDEInstInteractionAnalysisT } if (const auto *H = llvm::dyn_cast(I)) { if (!H->isIndirectCall() && H->getCalledFunction() && - this->ICF->isHeapAllocatingFunction(H->getCalledFunction())) { + psr::isHeapAllocatingFunction(H->getCalledFunction())) { Variables.insert(H); } } diff --git a/include/phasar/Utils/BitVectorSet.h b/include/phasar/Utils/BitVectorSet.h index 418d90e603..4f7176a46a 100644 --- a/include/phasar/Utils/BitVectorSet.h +++ b/include/phasar/Utils/BitVectorSet.h @@ -177,11 +177,11 @@ template class BitVectorSet { } [[nodiscard]] BitVectorSet setUnion(const BitVectorSet &Other) const { - size_t MaxSize = std::max(Bits.size(), Other.Bits.size()); - BitVectorSet Res; - Res.Bits.reserve(MaxSize); - Res.Bits = Bits; - Res.Bits |= Other.Bits; + BitVectorSet Res = Bits.size() < Other.Bits.size() ? Other : *this; + const BitVectorSet &Smaller = + Bits.size() < Other.Bits.size() ? *this : Other; + + Res.Bits |= Smaller.Bits; return Res; } From ed5ca475d675aefed5fe9efc2978794835e63981 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 22 Apr 2024 20:50:43 +0200 Subject: [PATCH 08/33] Make the FTaint Analysis work on the iia tests (not all pass, though) --- .../DefaultEdgeFunctionSingletonCache.h | 47 +- .../IfdsIde/EdgeFunctionSingletonCache.h | 4 +- .../phasar/DataFlow/IfdsIde/InitialSeeds.h | 30 +- .../Problems/IDEFeatureTaintAnalysis.h | 61 ++- .../Problems/IDEInstInteractionAnalysis.h | 3 +- include/phasar/Utils/BitVectorSet.h | 23 +- .../Problems/IDEFeatureTaintAnalysis.cpp | 399 +++++++++++------ .../DataFlow/IfdsIde/Problems/CMakeLists.txt | 1 + .../Problems/IDEFeatureTaintAnalysisTest.cpp | 411 ++++++++++++++++++ 9 files changed, 780 insertions(+), 199 deletions(-) create mode 100644 unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp diff --git a/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h b/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h index 2d05cb54c4..e78af20707 100644 --- a/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h +++ b/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h @@ -22,25 +22,25 @@ namespace psr { /// hash_value(const EdgeFunctionTy&). /// /// This cache is *not* thread-safe. -template -class DefaultEdgeFunctionSingletonCache +template +class DefaultEdgeFunctionSingletonCacheImpl : public EdgeFunctionSingletonCache { public: - DefaultEdgeFunctionSingletonCache() noexcept = default; + DefaultEdgeFunctionSingletonCacheImpl() noexcept = default; - DefaultEdgeFunctionSingletonCache(const DefaultEdgeFunctionSingletonCache &) = - delete; - DefaultEdgeFunctionSingletonCache & - operator=(const DefaultEdgeFunctionSingletonCache &) = delete; + DefaultEdgeFunctionSingletonCacheImpl( + const DefaultEdgeFunctionSingletonCacheImpl &) = delete; + DefaultEdgeFunctionSingletonCacheImpl & + operator=(const DefaultEdgeFunctionSingletonCacheImpl &) = delete; - DefaultEdgeFunctionSingletonCache( - DefaultEdgeFunctionSingletonCache &&) noexcept = default; - DefaultEdgeFunctionSingletonCache & - operator=(DefaultEdgeFunctionSingletonCache &&) noexcept = delete; - ~DefaultEdgeFunctionSingletonCache() override = default; + DefaultEdgeFunctionSingletonCacheImpl( + DefaultEdgeFunctionSingletonCacheImpl &&) noexcept = default; + DefaultEdgeFunctionSingletonCacheImpl & + operator=(DefaultEdgeFunctionSingletonCacheImpl &&) noexcept = delete; + ~DefaultEdgeFunctionSingletonCacheImpl() override = default; [[nodiscard]] const void * - lookup(ByConstRef EF) const noexcept override { + lookup(const EdgeFunctionTy &EF) const noexcept override { return Cache.lookup(&EF); } @@ -50,13 +50,10 @@ class DefaultEdgeFunctionSingletonCache assert(Inserted); } - void erase(ByConstRef EF) noexcept override { - Cache.erase(&EF); - } + void erase(const EdgeFunctionTy &EF) noexcept override { Cache.erase(&EF); } template - [[nodiscard]] EdgeFunction - createEdgeFunction(ArgTys &&...Args) { + [[nodiscard]] EdgeFunction createEdgeFunction(ArgTys &&...Args) { return CachedEdgeFunction{ EdgeFunctionTy{std::forward(Args)...}, this}; } @@ -92,19 +89,29 @@ class DefaultEdgeFunctionSingletonCache llvm::DenseMap Cache; }; +template +class DefaultEdgeFunctionSingletonCache + : public DefaultEdgeFunctionSingletonCacheImpl< + EdgeFunctionTy, typename EdgeFunctionTy::l_t> { +public: + using DefaultEdgeFunctionSingletonCacheImpl< + EdgeFunctionTy, + typename EdgeFunctionTy::l_t>::DefaultEdgeFunctionSingletonCacheImpl; +}; + template class DefaultEdgeFunctionSingletonCache< EdgeFunctionTy, std::enable_if_t>> { public: [[nodiscard]] const void * - lookup(ByConstRef /*EF*/) const noexcept override { + lookup(const EdgeFunctionTy & /*EF*/) const noexcept override { return nullptr; } void insert(const EdgeFunctionTy * /*EF*/, const void * /*Mem*/) override { assert(false && "We should never go here"); } - void erase(ByConstRef /*EF*/) noexcept override { + void erase(const EdgeFunctionTy & /*EF*/) noexcept override { assert(false && "We should never go here"); } [[nodiscard]] EdgeFunction diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h index 53e652c0fd..8b6c096633 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h @@ -46,7 +46,7 @@ template class EdgeFunctionSingletonCache { /// Checks whether the edge function EF is cached in this cache. Returns the /// cached entry if found, else nullptr. [[nodiscard]] virtual const void * - lookup(ByConstRef EF) const noexcept = 0; + lookup(const EdgeFunctionTy &EF) const noexcept = 0; /// Inserts the cache-entry Mem for the edge function *EF into the cache. /// Typically, EF points into the buffer pointed to by Mem. Both pointers are @@ -57,7 +57,7 @@ template class EdgeFunctionSingletonCache { /// Erases the cache-entry associated with the edge function EF from the /// cache. - virtual void erase(ByConstRef EF) noexcept = 0; + virtual void erase(const EdgeFunctionTy &EF) noexcept = 0; template [[nodiscard]] auto createEdgeFunction(ArgTys &&...Args) { diff --git a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h index 60e5ae23e3..db7ea654e7 100644 --- a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h +++ b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h @@ -11,6 +11,7 @@ #define PHASAR_DATAFLOW_IFDSIDE_INITIALSEEDS_H #include "phasar/Domain/BinaryDomain.h" +#include "phasar/Utils/Printer.h" #include "phasar/Utils/TypeTraits.h" #include "llvm/Support/Compiler.h" @@ -75,36 +76,13 @@ template class InitialSeeds { [[nodiscard]] GeneralizedSeeds getSeeds() && { return std::move(Seeds); } void dump(llvm::raw_ostream &OS = llvm::errs()) const { - - auto printNode = [&](auto &&Node) { // NOLINT - if constexpr (std::is_pointer_v && - is_llvm_printable_v>) { - OS << *Node; - } else { - OS << Node; - } - }; - - auto printFact = [&](auto &&Node) { // NOLINT - if constexpr (std::is_pointer_v && - is_llvm_printable_v>) { - OS << *Node; - } else { - OS << Node; - } - }; - OS << "======================== Initial Seeds ========================\n"; for (const auto &[Node, Facts] : Seeds) { - OS << "At "; - printNode(Node); - OS << "\n"; + OS << "At " << NToString(Node) << '\n'; for (const auto &[Fact, Value] : Facts) { - OS << "> "; - printFact(Fact); - OS << " --> \\." << Value << "\n"; + OS << "> " << DToString(Fact) << " --> \\." << LToString(Value) << '\n'; } - OS << "\n"; + OS << '\n'; } OS << "========================== End Seeds ==========================\n"; } diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index b07c745dd0..d2cd372846 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -10,12 +10,15 @@ #ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEFEATURETAINTANALYSIS_H #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEFEATURETAINTANALYSIS_H +#include "phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" #include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" #include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/Utils/BitVectorSet.h" #include "phasar/Utils/JoinLattice.h" +#include "phasar/Utils/Printer.h" #include "phasar/Utils/TypeTraits.h" #include "llvm/ADT/FunctionExtras.h" @@ -36,7 +39,7 @@ namespace psr { class LLVMProjectIRDB; struct IDEFeatureTaintEdgeFact { - llvm::SmallBitVector Taints{}; + llvm::SmallBitVector Taints; static llvm::SmallBitVector fromBits(uintptr_t Bits) { #if __has_builtin(__builtin_constant_p) @@ -56,6 +59,7 @@ struct IDEFeatureTaintEdgeFact { : Taints(Taints) {} IDEFeatureTaintEdgeFact(uintptr_t Taints) noexcept : Taints(fromBits(Taints)) {} + explicit IDEFeatureTaintEdgeFact() noexcept { Taints.invalid(); } void unionWith(uintptr_t Facts) { auto RequiredSize = llvm::findLastSet(Facts) + 1; @@ -68,9 +72,9 @@ struct IDEFeatureTaintEdgeFact { Taints |= Facts.Taints; } - [[nodiscard]] inline bool isBottom() const noexcept { - // TODO - return false; + [[nodiscard]] inline bool isBottom() const noexcept { return Taints.empty(); } + [[nodiscard]] inline bool isTop() const noexcept { + return Taints.isInvalid(); } friend llvm::hash_code @@ -114,20 +118,44 @@ struct IDEFeatureTaintEdgeFact { [](auto Word) { return Word == 0; }); } - template [[nodiscard]] std::string str() { + template [[nodiscard]] std::string str() const { auto BV = BitVectorSet::fromBits(Taints); return LToString(BV); } + + template [[nodiscard]] auto toBVSet() const { + return BitVectorSet::fromBits(Taints); + } + + template [[nodiscard]] auto toSet() const { + std::set Ret; + + for (const auto &Elem : this->template toBVSet()) { + Ret.insert(Elem); + } + return Ret; + } }; +std::string LToString(const IDEFeatureTaintEdgeFact &EdgeFact); + template <> struct JoinLatticeTraits { - inline static IDEFeatureTaintEdgeFact top() { return 0; } + inline static IDEFeatureTaintEdgeFact top() { + IDEFeatureTaintEdgeFact Ret{}; + return Ret; + } inline static IDEFeatureTaintEdgeFact bottom() { // TODO return 0; } inline static IDEFeatureTaintEdgeFact join(const IDEFeatureTaintEdgeFact &L, const IDEFeatureTaintEdgeFact &R) { + if (L.isTop()) { + return R; + } + if (R.isTop()) { + return L; + } auto Ret = L; Ret.Taints |= R.Taints; return Ret; @@ -153,9 +181,13 @@ class FeatureTaintGenerator { static GenerateTaintsFn createGenerateTaints(EdgeFactGenerator &&EFGen) { return [EFGen{std::forward(EFGen)}](InstOrGlobal IG) { const auto &TaintSet = std::invoke(EFGen, IG); - BitVectorSet> BV( + BitVectorSet, llvm::SmallBitVector> BV( llvm::adl_begin(TaintSet), llvm::adl_end(TaintSet)); - return IDEFeatureTaintEdgeFact{std::move(BV).getBits()}; + + auto Ret = IDEFeatureTaintEdgeFact{std::move(BV).getBits()}; + + llvm::errs() << "generateTaints: " << LToString(Ret) << '\n'; + return Ret; }; } @@ -240,6 +272,9 @@ class IDEFeatureTaintAnalysis FeatureTaintGenerator(std::forward(SrcDetector), std::forward(EFGen))) {} + // The EF Caches are incomplete, so move the dtor into the .cpp + ~IDEFeatureTaintAnalysis(); + ////////////////////////////////////////////////////////////////////////////// /// Flow Functions ////////////////////////////////////////////////////////////////////////////// @@ -290,9 +325,19 @@ class IDEFeatureTaintAnalysis void emitTextReport(const SolverResults &SR, llvm::raw_ostream &OS = llvm::outs()) override; + EdgeFunction extend(const EdgeFunction &FirstEF, + const EdgeFunction &SecondEF) override; + EdgeFunction combine(const EdgeFunction &FirstEF, + const EdgeFunction &OtherEF) override; + private: FeatureTaintGenerator TaintGen; LLVMAliasInfoRef PT; + + struct GenerateEF; + struct AddFactsEF; + DefaultEdgeFunctionSingletonCacheImpl GenEFCache; + DefaultEdgeFunctionSingletonCacheImpl AddEFCache; }; } // namespace psr diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h index 43060beccf..4c414bfbd9 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h @@ -1066,7 +1066,8 @@ class IDEInstInteractionAnalysisT if (llvm::isa>(FirstFunction)) { return OtherFunction; } - if (llvm::isa>(OtherFunction)) { + if (llvm::isa>(OtherFunction) && + !llvm::isa>(FirstFunction)) { return FirstFunction; } diff --git a/include/phasar/Utils/BitVectorSet.h b/include/phasar/Utils/BitVectorSet.h index f53f4643f6..17fe8e41e0 100644 --- a/include/phasar/Utils/BitVectorSet.h +++ b/include/phasar/Utils/BitVectorSet.h @@ -12,6 +12,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" @@ -20,16 +21,18 @@ #include #include +#include #include namespace psr { namespace internal { + inline bool isLess(const llvm::BitVector &Lhs, const llvm::BitVector &Rhs) { unsigned LhsBits = Lhs.size(); unsigned RhsBits = Rhs.size(); if (LhsBits > RhsBits) { - if (Lhs.find_first_in(RhsBits, LhsBits) != -1) { + if (Lhs.find_next(RhsBits) != -1) { return false; } } else if (LhsBits < RhsBits) { @@ -47,6 +50,15 @@ inline bool isLess(const llvm::BitVector &Lhs, const llvm::BitVector &Rhs) { } return false; } + +inline llvm::ArrayRef getWords(const llvm::BitVector &BV, + uintptr_t & /*Store*/) { + return BV.getData(); +} +inline llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, + uintptr_t &Store) { + return BV.getData(Store); +} } // namespace internal /** @@ -292,8 +304,10 @@ class BitVectorSet { } // Check, whether Lhs and Rhs actually have the same bits set and not // whether their internal representation is exactly identitcal - auto LhsWords = Lhs.Bits.getData(); - auto RhsWords = Rhs.Bits.getData(); + + uintptr_t LStore{}, RStore{}; + auto LhsWords = internal::getWords(Lhs.Bits, LStore); + auto RhsWords = internal::getWords(Rhs.Bits, RStore); if (LhsWords.size() == RhsWords.size()) { return LhsWords == RhsWords; } @@ -320,7 +334,8 @@ class BitVectorSet { if (BV.Bits.empty()) { return {}; } - auto Words = BV.Bits.getData(); + uintptr_t Store{}; + auto Words = internal::getWords(BV.Bits, Store); size_t Idx = Words.size(); while (Idx && Words[Idx - 1] == 0) { --Idx; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index 6c2ff9705d..cd5091abf4 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -14,12 +14,15 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include +#include #include using namespace psr; @@ -34,10 +37,23 @@ IDEFeatureTaintAnalysis::IDEFeatureTaintAnalysis( IRDB, std::move(EntryPoints), LLVMZeroValue::getInstance()), TaintGen(std::move(TaintGen)), PT(PT) {} +IDEFeatureTaintAnalysis::~IDEFeatureTaintAnalysis() = default; + +std::string psr::LToString(const IDEFeatureTaintEdgeFact &EdgeFact) { + std::string Ret; + llvm::raw_string_ostream ROS(Ret); + ROS << '<'; + llvm::interleaveComma(EdgeFact.Taints.set_bits(), ROS); + ROS << '>'; + return Ret; +} + auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) -> FlowFunctionPtrType { bool GeneratesFact = TaintGen.isSource(Curr); + llvm::errs() << "[getNormalFlowFunction]: " << llvmIRToString(Curr) << '\n'; + if (const auto *Alloca = llvm::dyn_cast(Curr)) { if (GeneratesFact) { return generateFromZero(Alloca); @@ -46,11 +62,19 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) } if (const auto *Load = llvm::dyn_cast(Curr)) { - return generateFlowIf( - Load, - [PointerOp = Load->getPointerOperand(), + return lambdaFlow( + [GeneratesFact, Load, PointerOp = Load->getPointerOperand(), PTS = PT.getReachableAllocationSites(Load->getPointerOperand(), true)]( - d_t Src) { return Src == PointerOp || PTS->count(Src); }); + d_t Source) -> container_type { + bool GenFromZero = + GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Source); + + if (GenFromZero || Source == PointerOp || PTS->count(Source)) { + return {Source, Load}; + } + + return {Source}; + }); } if (const auto *Store = llvm::dyn_cast(Curr)) { @@ -144,6 +168,9 @@ auto IDEFeatureTaintAnalysis::getRetFlowFunction(n_t CallSite, return mapFactsToCaller( llvm::cast(CallSite), ExitInst, {}, [GeneratesFact](const llvm::Value *RetVal, d_t Src) { + // ASK: Should all parameters be mapped back? Or just pointer params? + // ASK: IDEInstInteractionAnalysisTest.HandleCallTest_01: Should 14 + // (%call) hold for 'k' at ret? if (Src == RetVal) { return true; } @@ -170,19 +197,20 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( auto Mapper = mapFactsAlongsideCallSite( llvm::cast(CallSite), - [](d_t Arg) { return !Arg->getType()->isPointerTy(); }, + [](d_t Arg) { + return !Arg->getType()->isPointerTy(); + // return llvm::isa(Arg); + }, /*PropagateGlobals*/ false); if (GeneratesFact) { - unionFlows(std::move(Mapper), - generateFlowAndKillAllOthers(CallSite, getZeroValue())); + return unionFlows(std::move(Mapper), + generateFlowAndKillAllOthers(CallSite, getZeroValue())); } return Mapper; } -namespace { - -struct AddFactsEF { +struct IDEFeatureTaintAnalysis::AddFactsEF { using l_t = IDEFeatureTaintAnalysisDomain::l_t; IDEFeatureTaintEdgeFact Facts; @@ -193,10 +221,14 @@ struct AddFactsEF { } static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction); + const EdgeFunction &SecondFunction) { + llvm::report_fatal_error("Implemented in 'extend'"); + } static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction); + const EdgeFunction &OtherFunction) { + llvm::report_fatal_error("Implemented in 'combine'"); + } friend bool operator==(const AddFactsEF &L, const AddFactsEF &R) { return L.Facts == R.Facts; @@ -208,7 +240,7 @@ struct AddFactsEF { } }; -struct GenerateEF { +struct IDEFeatureTaintAnalysis::GenerateEF { using l_t = IDEFeatureTaintAnalysisDomain::l_t; IDEFeatureTaintEdgeFact Facts; @@ -220,10 +252,14 @@ struct GenerateEF { } static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction); + const EdgeFunction &SecondFunction) { + llvm::report_fatal_error("Implemented in 'extend'"); + } static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction); + const EdgeFunction &OtherFunction) { + llvm::report_fatal_error("Implemented in 'combine'"); + } friend bool operator==(const GenerateEF &L, const GenerateEF &R) { return L.Facts == R.Facts; @@ -235,6 +271,7 @@ struct GenerateEF { } }; +namespace { struct AddSmallFactsEF { using l_t = IDEFeatureTaintAnalysisDomain::l_t; @@ -246,10 +283,14 @@ struct AddSmallFactsEF { } static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction); + const EdgeFunction &SecondFunction) { + llvm::report_fatal_error("Implemented in 'extend'"); + } static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction); + const EdgeFunction &OtherFunction) { + llvm::report_fatal_error("Implemented in 'combine'"); + } friend bool operator==(const AddSmallFactsEF &L, const AddSmallFactsEF &R) { return L.Facts == R.Facts; @@ -273,10 +314,14 @@ struct GenerateSmallEF { } static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction); + const EdgeFunction &SecondFunction) { + llvm::report_fatal_error("Implemented in 'extend'"); + } static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction); + const EdgeFunction &OtherFunction) { + llvm::report_fatal_error("Implemented in 'combine'"); + } // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL friend llvm::hash_code hash_value(const GenerateSmallEF &EF) { @@ -286,171 +331,242 @@ struct GenerateSmallEF { friend bool operator==(GenerateSmallEF L, GenerateSmallEF R) { return L.Facts == R.Facts; } -}; -auto GenerateSmallEF::compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) - -> EdgeFunction { - if (auto Default = defaultComposeOrNull(This, SecondFunction)) { - return Default; + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + GenerateSmallEF EF) { + return OS << "GenerateSmallEF" << LToString(EF.computeTarget(0)); } +}; - auto Val = SecondFunction.computeTarget(This->computeTarget(0)); +// auto GenerateSmallEF::compose(EdgeFunctionRef This, +// const EdgeFunction &SecondFunction) +// -> EdgeFunction { +// if (auto Default = defaultComposeOrNull(This, SecondFunction)) { +// return Default; +// } - if (Val.Taints.isSmall()) { - uintptr_t Buf{}; - std::ignore = Val.Taints.getData(Buf); - return GenerateSmallEF{Buf}; - } +// auto Val = SecondFunction.computeTarget(This->computeTarget(0)); - // TODO: Caching +// if (Val.Taints.isSmall()) { +// uintptr_t Buf{}; +// std::ignore = Val.Taints.getData(Buf); +// return GenerateSmallEF{Buf}; +// } - return GenerateEF{std::move(Val)}; -} +// // TODO: Caching -auto AddSmallFactsEF::compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) - -> EdgeFunction { - if (auto Default = defaultComposeOrNull(This, SecondFunction)) { - return Default; - } +// return GenerateEF{std::move(Val)}; +// } - auto Val = SecondFunction.computeTarget(This->computeTarget(0)); +// auto AddSmallFactsEF::compose(EdgeFunctionRef This, +// const EdgeFunction &SecondFunction) +// -> EdgeFunction { +// if (auto Default = defaultComposeOrNull(This, SecondFunction)) { +// return Default; +// } - if (Val.Taints.isSmall()) { - uintptr_t Buf{}; - std::ignore = Val.Taints.getData(Buf); - return AddSmallFactsEF{Buf}; - } +// auto Val = SecondFunction.computeTarget(This->computeTarget(0)); - // TODO: Caching +// if (Val.Taints.isSmall()) { +// uintptr_t Buf{}; +// std::ignore = Val.Taints.getData(Buf); +// return AddSmallFactsEF{Buf}; +// } - return AddFactsEF{std::move(Val)}; -} +// // TODO: Caching -auto GenerateEF::compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) - -> EdgeFunction { - if (auto Default = defaultComposeOrNull(This, SecondFunction)) { - return Default; - } +// return AddFactsEF{std::move(Val)}; +// } - auto Val = SecondFunction.computeTarget(This->computeTarget(0)); +// auto GenerateEF::compose(EdgeFunctionRef This, +// const EdgeFunction &SecondFunction) +// -> EdgeFunction { +// if (auto Default = defaultComposeOrNull(This, SecondFunction)) { +// return Default; +// } + +// auto Val = SecondFunction.computeTarget(This->computeTarget(0)); - // TODO: Caching +// // TODO: Caching - return GenerateEF{std::move(Val)}; -} +// return GenerateEF{std::move(Val)}; +// } -auto AddFactsEF::compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) - -> EdgeFunction { - if (auto Default = defaultComposeOrNull(This, SecondFunction)) { - return Default; - } +// auto AddFactsEF::compose(EdgeFunctionRef This, +// const EdgeFunction &SecondFunction) +// -> EdgeFunction { +// if (auto Default = defaultComposeOrNull(This, SecondFunction)) { +// return Default; +// } - auto Val = SecondFunction.computeTarget(This->computeTarget(0)); +// auto Val = SecondFunction.computeTarget(This->computeTarget(0)); - // TODO: Caching +// // TODO: Caching - return AddFactsEF{std::move(Val)}; -} +// return AddFactsEF{std::move(Val)}; +// } -template -EdgeFunction joinWithGen(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { - if (auto Default = defaultJoinOrNull(This, OtherFunction)) { - return Default; - } +// template +// EdgeFunction joinWithGen(EdgeFunctionRef This, +// const EdgeFunction &OtherFunction) { +// if (auto Default = defaultJoinOrNull(This, OtherFunction)) { +// return Default; +// } - auto OtherFacts = OtherFunction.computeTarget(0); - OtherFacts.unionWith(This->Facts); +// auto OtherFacts = OtherFunction.computeTarget(0); +// OtherFacts.unionWith(This->Facts); - if (OtherFacts.Taints.isSmall()) { - uintptr_t Buf{}; - std::ignore = OtherFacts.Taints.getData(Buf); +// if (OtherFacts.Taints.isSmall()) { +// uintptr_t Buf{}; +// std::ignore = OtherFacts.Taints.getData(Buf); - if (OtherFunction.isConstant()) { - return GenerateSmallEF{Buf}; - } +// if (OtherFunction.isConstant()) { +// return GenerateSmallEF{Buf}; +// } - return AddSmallFactsEF{Buf}; - } +// return AddSmallFactsEF{Buf}; +// } - // TODO: Caching +// // TODO: Caching - if (OtherFunction.isConstant()) { - return GenerateEF{std::move(OtherFacts)}; - } +// if (OtherFunction.isConstant()) { +// return GenerateEF{std::move(OtherFacts)}; +// } - return AddFactsEF{std::move(OtherFacts)}; -} +// return AddFactsEF{std::move(OtherFacts)}; +// } -template -EdgeFunction joinWithAdd(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { - /// XXX: Here, we underapproximate joins with EdgeIdentity - if (llvm::isa>(OtherFunction)) { - return This; - } +// template +// EdgeFunction joinWithAdd(EdgeFunctionRef This, +// const EdgeFunction &OtherFunction) { +// /// XXX: Here, we underapproximate joins with EdgeIdentity +// if (llvm::isa>(OtherFunction)) { +// return This; +// } - if (auto Default = defaultJoinOrNull(This, OtherFunction)) { - return Default; - } +// if (auto Default = defaultJoinOrNull(This, OtherFunction)) { +// return Default; +// } - auto OtherFacts = OtherFunction.computeTarget(0); - OtherFacts.unionWith(This->Facts); +// auto OtherFacts = OtherFunction.computeTarget(0); +// OtherFacts.unionWith(This->Facts); - if (OtherFacts.Taints.isSmall()) { - uintptr_t Buf{}; - std::ignore = OtherFacts.Taints.getData(Buf); +// if (OtherFacts.Taints.isSmall()) { +// uintptr_t Buf{}; +// std::ignore = OtherFacts.Taints.getData(Buf); - return AddSmallFactsEF{Buf}; - } +// return AddSmallFactsEF{Buf}; +// } - // TODO: Caching +// // TODO: Caching - return AddFactsEF{std::move(OtherFacts)}; -} +// return AddFactsEF{std::move(OtherFacts)}; +// } -auto GenerateSmallEF::join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) - -> EdgeFunction { - return joinWithGen(This, OtherFunction); -} -auto GenerateEF::join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) - -> EdgeFunction { - return joinWithGen(This, OtherFunction); -} +// auto GenerateSmallEF::join(EdgeFunctionRef This, +// const EdgeFunction &OtherFunction) +// -> EdgeFunction { +// return joinWithGen(This, OtherFunction); +// } +// auto GenerateEF::join(EdgeFunctionRef This, +// const EdgeFunction &OtherFunction) +// -> EdgeFunction { +// return joinWithGen(This, OtherFunction); +// } -auto AddSmallFactsEF::join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) - -> EdgeFunction { - return joinWithAdd(This, OtherFunction); -} +// auto AddSmallFactsEF::join(EdgeFunctionRef This, +// const EdgeFunction &OtherFunction) +// -> EdgeFunction { +// return joinWithAdd(This, OtherFunction); +// } -auto AddFactsEF::join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) - -> EdgeFunction { - return joinWithAdd(This, OtherFunction); -} +// auto AddFactsEF::join(EdgeFunctionRef This, +// const EdgeFunction &OtherFunction) +// -> EdgeFunction { +// return joinWithAdd(This, OtherFunction); +// } /// -EdgeFunction genEF(l_t &&Facts) { +template +EdgeFunction genEF(l_t &&Facts, CacheT &GenEFCache) { if (Facts.Taints.isSmall()) { uintptr_t Buf{}; std::ignore = Facts.Taints.getData(Buf); return GenerateSmallEF{Buf}; } - return GenerateEF{std::move(Facts)}; + return GenEFCache.createEdgeFunction(std::move(Facts)); +} + +template +EdgeFunction addEF(l_t &&Facts, CacheT &AddEFCache) { + if (Facts.Taints.isSmall()) { + uintptr_t Buf{}; + std::ignore = Facts.Taints.getData(Buf); + return AddSmallFactsEF{Buf}; + } + return AddEFCache.createEdgeFunction(std::move(Facts)); } } // namespace //////////////////////////////////////////////////////////////////////////////// +EdgeFunction +IDEFeatureTaintAnalysis::extend(const EdgeFunction &FirstEF, + const EdgeFunction &SecondEF) { + auto Ret = [&] { + if (auto Default = defaultComposeOrNull(FirstEF, SecondEF)) { + llvm::errs() << "defaultComposeOrNull>>\n"; + return Default; + } + + auto Val = SecondEF.computeTarget(FirstEF.computeTarget(0)); + + if (FirstEF.isConstant()) { + return genEF(std::move(Val), GenEFCache); + } + + return addEF(std::move(Val), AddEFCache); + }(); + + llvm::errs() << "Extend " << FirstEF << " with " << SecondEF << " --> " << Ret + << '\n'; + + return Ret; +} +EdgeFunction +IDEFeatureTaintAnalysis::combine(const EdgeFunction &FirstEF, + const EdgeFunction &OtherEF) { + auto Ret = [&] { + /// XXX: Here, we underapproximate joins with EdgeIdentity + if (llvm::isa>(FirstEF)) { + return OtherEF; + } + if (llvm::isa>(OtherEF) && + !llvm::isa>(FirstEF)) { + return FirstEF; + } + + if (auto Default = defaultJoinOrNull(FirstEF, OtherEF)) { + return Default; + } + + auto ThisFacts = FirstEF.computeTarget(0); + ThisFacts.unionWith(OtherEF.computeTarget(0)); + + if (FirstEF.isConstant() && OtherEF.isConstant()) { + return genEF(std::move(ThisFacts), GenEFCache); + } + + return addEF(std::move(ThisFacts), AddEFCache); + }(); + + llvm::errs() << "Combine " << FirstEF << " and " << OtherEF << " --> " << Ret + << '\n'; + return Ret; +} + auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t /* Succ */, d_t SuccNode) @@ -458,12 +574,14 @@ auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, if (isZeroValue(SuccNode) || CurrNode == SuccNode) { // We don't want to propagate any facts on zero + llvm::errs() << "Identity Edge\n"; return EdgeIdentity{}; } if (isZeroValue(CurrNode)) { + llvm::errs() << "Generate from Zero\n"; // Generate user edge-facts from zero - return genEF(TaintGen.getGeneratedTaintsAt(Curr)); + return genEF(TaintGen.getGeneratedTaintsAt(Curr), GenEFCache); } // Overrides at store instructions @@ -472,10 +590,13 @@ auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, // Store tainted value // propagate facts unchanged. User edge-facts are generated from zero. + + llvm::errs() << "Store Identity\n"; return EdgeIdentity{}; } } + llvm::errs() << "Fallback Identity\n"; // Otherwise stick to identity. return EdgeIdentity{}; } @@ -486,7 +607,7 @@ auto IDEFeatureTaintAnalysis::getCallEdgeFunction(n_t CallSite, d_t SrcNode, -> EdgeFunction { if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Generate user edge-facts from zero - return genEF(TaintGen.getGeneratedTaintsAt(CallSite)); + return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); } return EdgeIdentity{}; @@ -497,7 +618,7 @@ auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Generate user edge-facts from zero - return genEF(TaintGen.getGeneratedTaintsAt(CallSite)); + return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); } return EdgeIdentity{}; @@ -508,7 +629,7 @@ auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( llvm::ArrayRef /*Callees*/) -> EdgeFunction { if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Generate user edge-facts from zero - return genEF(TaintGen.getGeneratedTaintsAt(CallSite)); + return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); } return EdgeIdentity{}; } @@ -527,7 +648,7 @@ auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { // parameters will otherwise cause trouble by overriding alloca // instructions without being valid data-flow facts themselves. - /// TODO: Do we want that? + /// TODO: Do we want that? --NO // for (const auto &Arg : SP->getFunction()->args()) { // Seeds.addSeed(SP, &Arg, BitVectorSet()); // } @@ -543,6 +664,8 @@ auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { } }); + Seeds.dump(llvm::errs()); + return Seeds; } diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt index 708ce3bcbc..7d5a903495 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt @@ -8,6 +8,7 @@ set(IfdsIdeProblemSources IDEGeneralizedLCATest.cpp IDEExtendedTaintAnalysisTest.cpp IDETSAnalysisFileIOTest.cpp + IDEFeatureTaintAnalysisTest.cpp ) if(PHASAR_BUILD_OPENSSL_TS_UNITTESTS) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp new file mode 100644 index 0000000000..9d2d075129 --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -0,0 +1,411 @@ +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" + +#include "phasar/Config/Configuration.h" +#include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/HelperAnalyses.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/BitVectorSet.h" +#include "phasar/Utils/Logger.h" + +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/Twine.h" + +#include "TestConfig.h" +#include "gtest/gtest.h" + +namespace { + +using namespace psr; + +static std::string printSet(const std::set &EdgeFact) { + std::string Ret; + llvm::raw_string_ostream ROS(Ret); + llvm::interleaveComma(EdgeFact, ROS << '<'); + ROS << '>'; + return Ret; +} + +/* ============== TEST FIXTURE ============== */ +class IDEInstInteractionAnalysisTest : public ::testing::Test { +protected: + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("inst_interaction/"); + + // Function - Line Nr - Variable - Values + using IIACompactResult_t = + std::tuple>; + + std::optional HA; + LLVMProjectIRDB *IRDB{}; + + void initializeIR(const llvm::Twine &LlvmFilePath, + const std::vector &EntryPoints = {"main"}) { + HA.emplace(PathToLlFiles + LlvmFilePath, EntryPoints, + HelperAnalysisConfig{}.withCGType(CallGraphAnalysisType::CHA)); + IRDB = &HA->getProjectIRDB(); + } + + void + doAnalysisAndCompareResults(const std::string &LlvmFilePath, + const std::vector &EntryPoints, + const std::set &GroundTruth, + bool PrintDump = false) { + initializeIR(LlvmFilePath, EntryPoints); + + // IDEInstInteractionAnalysisT IIAProblem(IRDB, &ICFG, + // &PT, + // EntryPoints); + + // use Phasar's instruction ids as testing labels + auto Generator = + [](std::variant + Current) -> std::set { + return std::visit( + [](const auto *InstOrGlob) -> std::set { + std::set Labels; + if (InstOrGlob->hasMetadata()) { + std::string Label = + llvm::cast( + InstOrGlob->getMetadata(PhasarConfig::MetaDataKind()) + ->getOperand(0)) + ->getString() + .str(); + Labels.insert(Label); + } + return Labels; + }, + Current); + }; + assert(HA); + auto IIAProblem = createAnalysisProblem( + *HA, EntryPoints, Generator); + + // if (PrintDump) { + // psr::Logger::initializeStderrLogger(SeverityLevel::DEBUG); + // } + + IDESolver IIASolver(IIAProblem, &HA->getICFG()); + IIASolver.solve(); + if (PrintDump) { + IRDB->emitPreprocessedIR(llvm::outs()); + IIASolver.dumpResults(); + } + // do the comparison + for (const auto &[FunName, SrcLine, VarName, LatticeVal] : GroundTruth) { + const auto *Fun = IRDB->getFunctionDefinition(FunName); + const auto *IRLine = getNthInstruction(Fun, SrcLine); + auto ResultMap = IIASolver.resultsAt(IRLine); + assert(IRLine && "Could not retrieve IR line!"); + bool FactFound = false; + for (auto &[Fact, Value] : ResultMap) { + std::string FactStr; + llvm::raw_string_ostream RSO(FactStr); + RSO << *Fact; + llvm::StringRef FactRef(FactStr); + if (FactRef.ltrim().startswith("%" + VarName + " ") || + FactRef.ltrim().startswith("@" + VarName + " ")) { + PHASAR_LOG_LEVEL(DFADEBUG, "Checking variable: " << FactStr); + EXPECT_EQ(LatticeVal, Value.toSet()) + << "Value do not match for Variable '" << VarName + << "': Expected " << printSet(LatticeVal) + << "; got: " << LToString(Value.toBVSet()); + FactFound = true; + } + } + + EXPECT_TRUE(FactFound) << "Variable '" << VarName << "' missing at '" + << llvmIRToString(IRLine) << "'."; + } + } + + void TearDown() override {} + +}; // Test Fixture + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 9, "i", std::set{"4"}); + GroundTruth.emplace("main", 9, "j", + std::set{"4", "5", "6", "7"}); + GroundTruth.emplace("main", 9, "retval", std::set{"3"}); + doAnalysisAndCompareResults("basic_01_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_02) { + std::set GroundTruth; + GroundTruth.emplace("main", 24, "retval", std::set{"6"}); + GroundTruth.emplace("main", 24, "argc.addr", std::set{"7"}); + GroundTruth.emplace("main", 24, "argv.addr", std::set{"8"}); + GroundTruth.emplace("main", 24, "i", std::set{"16", "18"}); + GroundTruth.emplace("main", 24, "j", + std::set{"9", "10", "11", "12"}); + GroundTruth.emplace("main", 24, "k", + std::set{"21", "16", "18", "20"}); + doAnalysisAndCompareResults("basic_02_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_03) { + std::set GroundTruth; + GroundTruth.emplace("main", 20, "retval", std::set{"3"}); + GroundTruth.emplace("main", 20, "i", + std::set{"4", "10", "11", "12"}); + GroundTruth.emplace("main", 20, "x", + std::set{"5", "14", "15", "16"}); + doAnalysisAndCompareResults("basic_03_cpp.ll", {"main"}, GroundTruth, false); +} + +PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_04) { + // If we use libcxx this won't work since internal implementation is different + LIBCPP_GTEST_SKIP; + + std::set GroundTruth; + GroundTruth.emplace("main", 23, "retval", std::set{"13"}); + GroundTruth.emplace("main", 23, "argc.addr", std::set{"14"}); + GroundTruth.emplace("main", 23, "argv.addr", std::set{"15"}); + GroundTruth.emplace("main", 23, "i", std::set{"16"}); + GroundTruth.emplace("main", 23, "j", + std::set{"16", "17", "19", "18"}); + GroundTruth.emplace( + "main", 23, "k", + std::set{"16", "17", "18", "19", "20", "24", "25"}); + doAnalysisAndCompareResults("basic_04_cpp.ll", {"main"}, GroundTruth, false); +}) + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_05) { + std::set GroundTruth; + GroundTruth.emplace("main", 11, "i", std::set{"5", "7"}); + GroundTruth.emplace("main", 11, "retval", std::set{"2"}); + doAnalysisAndCompareResults("basic_05_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_06) { + std::set GroundTruth; + GroundTruth.emplace("main", 19, "retval", std::set{"5"}); + GroundTruth.emplace("main", 19, "i", std::set{"15", "6", "13"}); + GroundTruth.emplace("main", 19, "j", std::set{"15", "6", "13"}); + GroundTruth.emplace("main", 19, "k", std::set{"6"}); + GroundTruth.emplace("main", 19, "p", + std::set{"1", "2", "9", "11"}); + doAnalysisAndCompareResults("basic_06_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_07) { + std::set GroundTruth; + GroundTruth.emplace("main", 15, "retval", std::set{"5"}); + GroundTruth.emplace("main", 15, "argc.addr", std::set{"6"}); + GroundTruth.emplace("main", 15, "argv.addr", std::set{"7"}); + GroundTruth.emplace("main", 15, "i", std::set{"12"}); + GroundTruth.emplace("main", 15, "j", + std::set{"8", "9", "10", "11"}); + doAnalysisAndCompareResults("basic_07_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_08) { + std::set GroundTruth; + GroundTruth.emplace("main", 12, "retval", std::set{"2"}); + GroundTruth.emplace("main", 12, "i", std::set{"9"}); + doAnalysisAndCompareResults("basic_08_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_09) { + std::set GroundTruth; + GroundTruth.emplace("main", 10, "i", std::set{"4"}); + GroundTruth.emplace("main", 10, "j", std::set{"4", "6", "7"}); + GroundTruth.emplace("main", 10, "retval", std::set{"3"}); + doAnalysisAndCompareResults("basic_09_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_10) { + std::set GroundTruth; + GroundTruth.emplace("main", 6, "i", std::set{"3"}); + GroundTruth.emplace("main", 6, "retval", std::set{"2"}); + doAnalysisAndCompareResults("basic_10_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_11) { + std::set GroundTruth; + GroundTruth.emplace("main", 20, "FeatureSelector", + std::set{"5", "7", "8"}); + GroundTruth.emplace("main", 20, "retval", std::set{"11", "16"}); + doAnalysisAndCompareResults("basic_11_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 14, "retval", std::set{"8"}); + GroundTruth.emplace("main", 14, "i", std::set{"9"}); + GroundTruth.emplace("main", 14, "j", + std::set{"12", "9", "10", "11"}); + GroundTruth.emplace( + "main", 14, "k", + std::set{"15", "1", "2", "13", "12", "9", "10", "11"}); + doAnalysisAndCompareResults("call_01_cpp.ll", {"main"}, GroundTruth, true); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_02) { + std::set GroundTruth; + GroundTruth.emplace("main", 13, "retval", std::set{"12"}); + GroundTruth.emplace("main", 13, "i", std::set{"13"}); + GroundTruth.emplace("main", 13, "j", std::set{"14"}); + GroundTruth.emplace("main", 13, "k", + std::set{"4", "5", "15", "6", "3", "14", "2", + "13", "16", "18"}); + doAnalysisAndCompareResults("call_02_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_03) { + std::set GroundTruth; + GroundTruth.emplace("main", 10, "retval", std::set{"20"}); + GroundTruth.emplace("main", 10, "i", std::set{"21"}); + GroundTruth.emplace("main", 10, "j", + std::set{"22", "15", "6", "21", "2", "13", + "8", "9", "12", "10", "24"}); + doAnalysisAndCompareResults("call_03_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_04) { + std::set GroundTruth; + GroundTruth.emplace("main", 20, "retval", std::set{"33"}); + GroundTruth.emplace("main", 20, "i", std::set{"34"}); + GroundTruth.emplace("main", 20, "j", + std::set{"15", "6", "2", "13", "8", "9", + "12", "10", "35", "34", "37"}); + GroundTruth.emplace("main", 20, "k", + std::set{ + "41", "19", "15", "6", "44", "2", "13", "8", "45", + "18", "9", "12", "10", "46", "24", "25", "35", "27", + "23", "26", "38", "34", "37", "42", "40"}); + doAnalysisAndCompareResults("call_04_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_05) { + std::set GroundTruth; + GroundTruth.emplace("main", 10, "retval", std::set{"8"}); + GroundTruth.emplace("main", 10, "i", std::set{"3", "11", "9"}); + GroundTruth.emplace("main", 10, "j", std::set{"3", "10", "12"}); + doAnalysisAndCompareResults("call_05_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_06) { + // NOTE: Here we are suffering from IntraProceduralAliasesOnly + std::set GroundTruth; + GroundTruth.emplace("main", 24, "retval", std::set{"11"}); + GroundTruth.emplace("main", 24, "i", + std::set{"3", "1", "2", "16", "18", "12"}); + GroundTruth.emplace("main", 24, "j", + std::set{"19", "21", "3", "1", "2", "13"}); + GroundTruth.emplace("main", 24, "k", + std::set{"22", "3", "14", "1", "2", "24"}); + GroundTruth.emplace("main", 24, "l", + std::set{"15", "3", "1", "2", "25", "27"}); + doAnalysisAndCompareResults("call_06_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_07) { + std::set GroundTruth; + GroundTruth.emplace("main", 6, "retval", std::set{"7"}); + GroundTruth.emplace("main", 6, "VarIR", std::set{"6", "3", "8"}); + doAnalysisAndCompareResults("call_07_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 9, "retval", std::set{"3"}); + GroundTruth.emplace("main", 9, "i", std::set{"7"}); + GroundTruth.emplace("main", 9, "j", std::set{"0", "5", "6"}); + doAnalysisAndCompareResults("global_01_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_02) { + std::set GroundTruth; + GroundTruth.emplace("_Z5initBv", 2, "a", std::set{"0"}); + GroundTruth.emplace("_Z5initBv", 2, "b", std::set{"2"}); + GroundTruth.emplace("main", 12, "a", std::set{"0"}); + GroundTruth.emplace("main", 12, "b", std::set{"2"}); + GroundTruth.emplace("main", 12, "retval", std::set{"6"}); + GroundTruth.emplace("main", 12, "c", std::set{"1", "8", "7"}); + doAnalysisAndCompareResults("global_02_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_03) { + std::set GroundTruth; + GroundTruth.emplace("main", 1, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("main", 2, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("main", 17, "GlobalFeature", std::set{"0"}); + doAnalysisAndCompareResults("global_03_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_04) { + std::set GroundTruth; + GroundTruth.emplace("main", 1, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("main", 2, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("main", 17, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("_Z7doStuffi", 1, "GlobalFeature", + std::set{"0"}); + GroundTruth.emplace("_Z7doStuffi", 2, "GlobalFeature", + std::set{"0"}); + doAnalysisAndCompareResults("global_04_cpp.ll", {"main", "_Z7doStuffi"}, + GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, KillTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 12, "retval", std::set{"4"}); + GroundTruth.emplace("main", 12, "i", std::set{"5"}); + GroundTruth.emplace("main", 12, "j", std::set{"10"}); + GroundTruth.emplace("main", 12, "k", std::set{"9", "8", "5"}); + doAnalysisAndCompareResults("KillTest_01_cpp.ll", {"main"}, GroundTruth, + false); +} + +TEST_F(IDEInstInteractionAnalysisTest, KillTest_02) { + std::set GroundTruth; + GroundTruth.emplace("main", 12, "retval", std::set{"6"}); + GroundTruth.emplace("main", 12, "A", std::set{"0"}); + GroundTruth.emplace("main", 12, "B", std::set{"2"}); + GroundTruth.emplace("main", 12, "C", std::set{"1", "7", "8"}); + doAnalysisAndCompareResults("KillTest_02_cpp.ll", {"main"}, GroundTruth, + false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleReturnTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 6, "retval", std::set{"3"}); + GroundTruth.emplace("main", 6, "localVar", std::set{"4"}); + GroundTruth.emplace("main", 6, "call", std::set{"0"}); + GroundTruth.emplace("main", 8, "localVar", std::set{"0", "6"}); + GroundTruth.emplace("main", 8, "call", std::set{"0"}); + doAnalysisAndCompareResults("return_01_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleHeapTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 19, "retval", std::set{"3"}); + GroundTruth.emplace("main", 19, "i", std::set{"6", "7"}); + GroundTruth.emplace("main", 19, "j", + std::set{"6", "7", "8", "10", "9"}); + doAnalysisAndCompareResults("heap_01_cpp.ll", {"main"}, GroundTruth, false); +} + +PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_01) { + GTEST_SKIP() << "This test heavily depends on the used stdlib version. TODO: " + "add a better one"; + + std::set GroundTruth; + GroundTruth.emplace("main", 16, "retval", std::set{"75", "76"}); + GroundTruth.emplace("main", 16, "str", + std::set{"70", "65", "72", "74", "77"}); + GroundTruth.emplace("main", 16, "ref.tmp", + std::set{"66", "9", "72", "73", "71"}); + doAnalysisAndCompareResults("rvo_01_cpp.ll", {"main"}, GroundTruth, false); +}) +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} From 4f3ba96a73e41cda937944a7a1fe4fab2b40b7d1 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 23 Apr 2024 09:17:10 +0200 Subject: [PATCH 09/33] Fix IIA --- include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h | 3 ++- .../DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h index 7c8a7ebcba..bfbc9d2333 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h @@ -139,7 +139,8 @@ defaultComposeOrNull(const EdgeFunction &This, if (llvm::isa>(SecondFunction)) { return This; } - if (SecondFunction.isConstant() || llvm::isa>(This)) { + if (SecondFunction.isConstant() || llvm::isa>(This) || + llvm::isa>(This)) { return SecondFunction; } if (llvm::isa>(This)) { diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h index 4c414bfbd9..b4410d1f99 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEInstInteractionAnalysis.h @@ -1037,7 +1037,8 @@ class IDEInstInteractionAnalysisT return KillOrReplace->Replacement; } llvm::report_fatal_error( - "found unexpected first edge function in 'getData'"); + "found unexpected first edge function in 'getData': " + + llvm::Twine(to_string(EF))); } EdgeFunction extend(const EdgeFunction &FirstFunction, From 9d5de40a06859bfbf0d83966abdbbdef56acdc92 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 23 Apr 2024 09:42:24 +0200 Subject: [PATCH 10/33] Fix call handling (TODO: Should we strong update pointer args?) --- .../Problems/IDEFeatureTaintAnalysis.cpp | 40 ++++++++++++++----- .../Problems/IDEFeatureTaintAnalysisTest.cpp | 5 +++ 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index cd5091abf4..6b44e008a6 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -139,7 +139,7 @@ auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) const auto *CS = llvm::cast(CallSite); // Map actual to formal parameters. - return mapFactsToCallee( + auto MapFactsToCalleeFF = mapFactsToCallee( CS, DestFun, [CS](const llvm::Value *ActualArg, ByConstRef Src) { if (d_t(ActualArg) != Src) { return false; @@ -151,6 +151,18 @@ auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) return true; }); + + // Generate the artificially introduced RVO parameters from zero value. + const auto *SRetFormal = + CS->hasStructRetAttr() ? DestFun->getArg(0) : nullptr; + + if (SRetFormal && TaintGen.isSource(CallSite)) { + return unionFlows( + std::move(MapFactsToCalleeFF), + generateFlowAndKillAllOthers(SRetFormal, this->getZeroValue())); + } + + return MapFactsToCalleeFF; } auto IDEFeatureTaintAnalysis::getRetFlowFunction(n_t CallSite, @@ -164,7 +176,10 @@ auto IDEFeatureTaintAnalysis::getRetFlowFunction(n_t CallSite, return killAllFlows(); } - bool GeneratesFact = TaintGen.isSource(ExitInst); + const auto *RetInst = llvm::dyn_cast(ExitInst); + auto *RetVal = RetInst ? RetInst->getReturnValue() : nullptr; + bool GeneratesFact = llvm::isa_and_nonnull(RetVal) && + TaintGen.isSource(ExitInst); return mapFactsToCaller( llvm::cast(CallSite), ExitInst, {}, [GeneratesFact](const llvm::Value *RetVal, d_t Src) { @@ -185,10 +200,9 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( n_t CallSite, n_t /* RetSite */, llvm::ArrayRef Callees) -> FlowFunctionPtrType { - bool GeneratesFact = - !CallSite->getType()->isVoidTy() && TaintGen.isSource(CallSite); - if (llvm::all_of(Callees, [](f_t Fun) { return Fun->isDeclaration(); })) { + bool GeneratesFact = + !CallSite->getType()->isVoidTy() && TaintGen.isSource(CallSite); if (GeneratesFact) { return generateFromZero(CallSite); } @@ -203,10 +217,11 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( }, /*PropagateGlobals*/ false); - if (GeneratesFact) { - return unionFlows(std::move(Mapper), - generateFlowAndKillAllOthers(CallSite, getZeroValue())); - } + // if (GeneratesFact) { + // return unionFlows(std::move(Mapper), + // generateFlowAndKillAllOthers(CallSite, + // getZeroValue())); + // } return Mapper; } @@ -614,11 +629,11 @@ auto IDEFeatureTaintAnalysis::getCallEdgeFunction(n_t CallSite, d_t SrcNode, } auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( - n_t CallSite, f_t /*CalleeFunction*/, n_t /*ExitStmt*/, d_t ExitNode, + n_t CallSite, f_t /*CalleeFunction*/, n_t ExitStmt, d_t ExitNode, n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Generate user edge-facts from zero - return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); + return genEF(TaintGen.getGeneratedTaintsAt(ExitStmt), GenEFCache); } return EdgeIdentity{}; @@ -629,6 +644,9 @@ auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( llvm::ArrayRef /*Callees*/) -> EdgeFunction { if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Generate user edge-facts from zero + + llvm::errs() << "At CTR " << llvmIRToString(CallSite) + << ": Gen from zero!\n"; return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); } return EdgeIdentity{}; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index 9d2d075129..60899f7f03 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -47,6 +47,11 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { HA.emplace(PathToLlFiles + LlvmFilePath, EntryPoints, HelperAnalysisConfig{}.withCGType(CallGraphAnalysisType::CHA)); IRDB = &HA->getProjectIRDB(); + + for (const auto *Inst : IRDB->getAllInstructions()) { + BitVectorSet BV; + BV.insert(getMetaDataID(Inst)); + } } void From 0adaae82706d8d57387d16c8ff05acc4b22ffba3 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 23 Apr 2024 09:55:42 +0200 Subject: [PATCH 11/33] Rename FeatureTaintAnalysis to FeatureInteractionAnalysis --- ...ysis.h => IDEFeatureInteractionAnalysis.h} | 37 ++++--- ....cpp => IDEFeatureInteractionAnalysis.cpp} | 101 +++++++++--------- .../DataFlow/IfdsIde/Problems/CMakeLists.txt | 2 +- ... => IDEFeatureInteractionAnalysisTest.cpp} | 5 +- 4 files changed, 77 insertions(+), 68 deletions(-) rename include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/{IDEFeatureTaintAnalysis.h => IDEFeatureInteractionAnalysis.h} (90%) rename lib/PhasarLLVM/DataFlow/IfdsIde/Problems/{IDEFeatureTaintAnalysis.cpp => IDEFeatureInteractionAnalysis.cpp} (87%) rename unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/{IDEFeatureTaintAnalysisTest.cpp => IDEFeatureInteractionAnalysisTest.cpp} (99%) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h similarity index 90% rename from include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h rename to include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h index d2cd372846..839e414e63 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h @@ -162,7 +162,7 @@ template <> struct JoinLatticeTraits { } }; -struct IDEFeatureTaintAnalysisDomain : LLVMAnalysisDomainDefault { +struct IDEFeatureInteractionAnalysisDomain : LLVMAnalysisDomainDefault { using l_t = IDEFeatureTaintEdgeFact; }; @@ -186,7 +186,7 @@ class FeatureTaintGenerator { auto Ret = IDEFeatureTaintEdgeFact{std::move(BV).getBits()}; - llvm::errs() << "generateTaints: " << LToString(Ret) << '\n'; + // llvm::errs() << "generateTaints: " << LToString(Ret) << '\n'; return Ret; }; } @@ -246,34 +246,37 @@ class FeatureTaintGenerator { PrinterFn Printer; }; -class IDEFeatureTaintAnalysis - : public IDETabulationProblem { +class IDEFeatureInteractionAnalysis + : public IDETabulationProblem { public: - IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, - std::vector EntryPoints, - FeatureTaintGenerator &&TaintGen); + IDEFeatureInteractionAnalysis(const LLVMProjectIRDB *IRDB, + LLVMAliasInfoRef PT, + std::vector EntryPoints, + FeatureTaintGenerator &&TaintGen); template - IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, - std::vector EntryPoints, - EdgeFactGenerator &&EFGen) - : IDEFeatureTaintAnalysis( + IDEFeatureInteractionAnalysis(const LLVMProjectIRDB *IRDB, + LLVMAliasInfoRef PT, + std::vector EntryPoints, + EdgeFactGenerator &&EFGen) + : IDEFeatureInteractionAnalysis( IRDB, PT, std::move(EntryPoints), FeatureTaintGenerator(std::forward(EFGen))) {} template - IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, - std::vector EntryPoints, - SourceDetector &&SrcDetector, - EdgeFactGenerator &&EFGen) - : IDEFeatureTaintAnalysis( + IDEFeatureInteractionAnalysis(const LLVMProjectIRDB *IRDB, + LLVMAliasInfoRef PT, + std::vector EntryPoints, + SourceDetector &&SrcDetector, + EdgeFactGenerator &&EFGen) + : IDEFeatureInteractionAnalysis( IRDB, PT, std::move(EntryPoints), FeatureTaintGenerator(std::forward(SrcDetector), std::forward(EFGen))) {} // The EF Caches are incomplete, so move the dtor into the .cpp - ~IDEFeatureTaintAnalysis(); + ~IDEFeatureInteractionAnalysis(); ////////////////////////////////////////////////////////////////////////////// /// Flow Functions diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp similarity index 87% rename from lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp rename to lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp index 6b44e008a6..8e967f367c 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp @@ -1,4 +1,4 @@ -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h" #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" @@ -27,17 +27,17 @@ using namespace psr; -using l_t = IDEFeatureTaintAnalysisDomain::l_t; -using d_t = IDEFeatureTaintAnalysisDomain::d_t; +using l_t = IDEFeatureInteractionAnalysisDomain::l_t; +using d_t = IDEFeatureInteractionAnalysisDomain::d_t; -IDEFeatureTaintAnalysis::IDEFeatureTaintAnalysis( +IDEFeatureInteractionAnalysis::IDEFeatureInteractionAnalysis( const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, std::vector EntryPoints, FeatureTaintGenerator &&TaintGen) - : IDETabulationProblem( + : IDETabulationProblem( IRDB, std::move(EntryPoints), LLVMZeroValue::getInstance()), TaintGen(std::move(TaintGen)), PT(PT) {} -IDEFeatureTaintAnalysis::~IDEFeatureTaintAnalysis() = default; +IDEFeatureInteractionAnalysis::~IDEFeatureInteractionAnalysis() = default; std::string psr::LToString(const IDEFeatureTaintEdgeFact &EdgeFact) { std::string Ret; @@ -48,11 +48,13 @@ std::string psr::LToString(const IDEFeatureTaintEdgeFact &EdgeFact) { return Ret; } -auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) +auto IDEFeatureInteractionAnalysis::getNormalFlowFunction(n_t Curr, + n_t /* Succ */) -> FlowFunctionPtrType { bool GeneratesFact = TaintGen.isSource(Curr); - llvm::errs() << "[getNormalFlowFunction]: " << llvmIRToString(Curr) << '\n'; + // llvm::errs() << "[getNormalFlowFunction]: " << llvmIRToString(Curr) << + // '\n'; if (const auto *Alloca = llvm::dyn_cast(Curr)) { if (GeneratesFact) { @@ -128,7 +130,8 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) }); } -auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) +auto IDEFeatureInteractionAnalysis::getCallFlowFunction(n_t CallSite, + f_t DestFun) -> FlowFunctionPtrType { if (DestFun->isDeclaration()) { @@ -165,10 +168,10 @@ auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) return MapFactsToCalleeFF; } -auto IDEFeatureTaintAnalysis::getRetFlowFunction(n_t CallSite, - f_t /*CalleeFun*/, - n_t ExitInst, - n_t /* RetSite */) +auto IDEFeatureInteractionAnalysis::getRetFlowFunction(n_t CallSite, + f_t /*CalleeFun*/, + n_t ExitInst, + n_t /* RetSite */) -> FlowFunctionPtrType { // Map return value back to the caller. If pointer parameters hold at the // end of a callee function generate all of those in the caller context. @@ -196,7 +199,7 @@ auto IDEFeatureTaintAnalysis::getRetFlowFunction(n_t CallSite, }); } -auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( +auto IDEFeatureInteractionAnalysis::getCallToRetFlowFunction( n_t CallSite, n_t /* RetSite */, llvm::ArrayRef Callees) -> FlowFunctionPtrType { @@ -225,8 +228,8 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( return Mapper; } -struct IDEFeatureTaintAnalysis::AddFactsEF { - using l_t = IDEFeatureTaintAnalysisDomain::l_t; +struct IDEFeatureInteractionAnalysis::AddFactsEF { + using l_t = IDEFeatureInteractionAnalysisDomain::l_t; IDEFeatureTaintEdgeFact Facts; @@ -255,8 +258,8 @@ struct IDEFeatureTaintAnalysis::AddFactsEF { } }; -struct IDEFeatureTaintAnalysis::GenerateEF { - using l_t = IDEFeatureTaintAnalysisDomain::l_t; +struct IDEFeatureInteractionAnalysis::GenerateEF { + using l_t = IDEFeatureInteractionAnalysisDomain::l_t; IDEFeatureTaintEdgeFact Facts; @@ -288,7 +291,7 @@ struct IDEFeatureTaintAnalysis::GenerateEF { namespace { struct AddSmallFactsEF { - using l_t = IDEFeatureTaintAnalysisDomain::l_t; + using l_t = IDEFeatureInteractionAnalysisDomain::l_t; uintptr_t Facts{}; @@ -318,7 +321,7 @@ struct AddSmallFactsEF { }; struct GenerateSmallEF { - using l_t = IDEFeatureTaintAnalysisDomain::l_t; + using l_t = IDEFeatureInteractionAnalysisDomain::l_t; uintptr_t Facts{}; @@ -528,11 +531,11 @@ EdgeFunction addEF(l_t &&Facts, CacheT &AddEFCache) { //////////////////////////////////////////////////////////////////////////////// EdgeFunction -IDEFeatureTaintAnalysis::extend(const EdgeFunction &FirstEF, - const EdgeFunction &SecondEF) { +IDEFeatureInteractionAnalysis::extend(const EdgeFunction &FirstEF, + const EdgeFunction &SecondEF) { auto Ret = [&] { if (auto Default = defaultComposeOrNull(FirstEF, SecondEF)) { - llvm::errs() << "defaultComposeOrNull>>\n"; + // llvm::errs() << "defaultComposeOrNull>>\n"; return Default; } @@ -545,14 +548,15 @@ IDEFeatureTaintAnalysis::extend(const EdgeFunction &FirstEF, return addEF(std::move(Val), AddEFCache); }(); - llvm::errs() << "Extend " << FirstEF << " with " << SecondEF << " --> " << Ret - << '\n'; + // llvm::errs() << "Extend " << FirstEF << " with " << SecondEF << " --> " << + // Ret + // << '\n'; return Ret; } EdgeFunction -IDEFeatureTaintAnalysis::combine(const EdgeFunction &FirstEF, - const EdgeFunction &OtherEF) { +IDEFeatureInteractionAnalysis::combine(const EdgeFunction &FirstEF, + const EdgeFunction &OtherEF) { auto Ret = [&] { /// XXX: Here, we underapproximate joins with EdgeIdentity if (llvm::isa>(FirstEF)) { @@ -577,24 +581,25 @@ IDEFeatureTaintAnalysis::combine(const EdgeFunction &FirstEF, return addEF(std::move(ThisFacts), AddEFCache); }(); - llvm::errs() << "Combine " << FirstEF << " and " << OtherEF << " --> " << Ret - << '\n'; + // llvm::errs() << "Combine " << FirstEF << " and " << OtherEF << " --> " << + // Ret + // << '\n'; return Ret; } -auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, - n_t /* Succ */, - d_t SuccNode) - -> EdgeFunction { +auto IDEFeatureInteractionAnalysis::getNormalEdgeFunction( + n_t Curr, d_t CurrNode, n_t /* Succ */, d_t SuccNode) -> EdgeFunction { if (isZeroValue(SuccNode) || CurrNode == SuccNode) { // We don't want to propagate any facts on zero - llvm::errs() << "Identity Edge\n"; + + // llvm::errs() << "Identity Edge\n"; return EdgeIdentity{}; } if (isZeroValue(CurrNode)) { - llvm::errs() << "Generate from Zero\n"; + // llvm::errs() << "Generate from Zero\n"; + // Generate user edge-facts from zero return genEF(TaintGen.getGeneratedTaintsAt(Curr), GenEFCache); } @@ -606,19 +611,18 @@ auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, // propagate facts unchanged. User edge-facts are generated from zero. - llvm::errs() << "Store Identity\n"; + // llvm::errs() << "Store Identity\n"; return EdgeIdentity{}; } } - llvm::errs() << "Fallback Identity\n"; + // llvm::errs() << "Fallback Identity\n"; // Otherwise stick to identity. return EdgeIdentity{}; } -auto IDEFeatureTaintAnalysis::getCallEdgeFunction(n_t CallSite, d_t SrcNode, - f_t /*DestinationFunction*/, - d_t DestNode) +auto IDEFeatureInteractionAnalysis::getCallEdgeFunction( + n_t CallSite, d_t SrcNode, f_t /*DestinationFunction*/, d_t DestNode) -> EdgeFunction { if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Generate user edge-facts from zero @@ -628,7 +632,7 @@ auto IDEFeatureTaintAnalysis::getCallEdgeFunction(n_t CallSite, d_t SrcNode, return EdgeIdentity{}; } -auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( +auto IDEFeatureInteractionAnalysis::getReturnEdgeFunction( n_t CallSite, f_t /*CalleeFunction*/, n_t ExitStmt, d_t ExitNode, n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { @@ -639,20 +643,21 @@ auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( return EdgeIdentity{}; } -auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( +auto IDEFeatureInteractionAnalysis::getCallToRetEdgeFunction( n_t CallSite, d_t CallNode, n_t /*RetSite*/, d_t RetSiteNode, llvm::ArrayRef /*Callees*/) -> EdgeFunction { if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Generate user edge-facts from zero - llvm::errs() << "At CTR " << llvmIRToString(CallSite) - << ": Gen from zero!\n"; + // llvm::errs() << "At CTR " << llvmIRToString(CallSite) + // << ": Gen from zero!\n"; return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); } return EdgeIdentity{}; } -auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { +auto IDEFeatureInteractionAnalysis::initialSeeds() + -> InitialSeeds { InitialSeeds Seeds; LLVMBasedCFG CFG; @@ -682,16 +687,16 @@ auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { } }); - Seeds.dump(llvm::errs()); + // Seeds.dump(llvm::errs()); return Seeds; } -bool IDEFeatureTaintAnalysis::isZeroValue(d_t FlowFact) const noexcept { +bool IDEFeatureInteractionAnalysis::isZeroValue(d_t FlowFact) const noexcept { return LLVMZeroValue::isLLVMZeroValue(FlowFact); } -void IDEFeatureTaintAnalysis::emitTextReport( +void IDEFeatureInteractionAnalysis::emitTextReport( const SolverResults &SR, llvm::raw_ostream &OS) { OS << "\n====================== IDE-Inst-Interaction-Analysis Report " "======================\n"; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt index 7d5a903495..29738963c8 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt @@ -8,7 +8,7 @@ set(IfdsIdeProblemSources IDEGeneralizedLCATest.cpp IDEExtendedTaintAnalysisTest.cpp IDETSAnalysisFileIOTest.cpp - IDEFeatureTaintAnalysisTest.cpp + IDEFeatureInteractionAnalysisTest.cpp ) if(PHASAR_BUILD_OPENSSL_TS_UNITTESTS) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp similarity index 99% rename from unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp rename to unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp index 60899f7f03..5adc0642ef 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp @@ -1,4 +1,4 @@ -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h" #include "phasar/Config/Configuration.h" #include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" @@ -48,6 +48,7 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { HelperAnalysisConfig{}.withCGType(CallGraphAnalysisType::CHA)); IRDB = &HA->getProjectIRDB(); + // Initialze IDs for (const auto *Inst : IRDB->getAllInstructions()) { BitVectorSet BV; BV.insert(getMetaDataID(Inst)); @@ -86,7 +87,7 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { Current); }; assert(HA); - auto IIAProblem = createAnalysisProblem( + auto IIAProblem = createAnalysisProblem( *HA, EntryPoints, Generator); // if (PrintDump) { From b00766997714f034c05cc5641f14361178370e25 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 19 May 2024 12:00:15 +0200 Subject: [PATCH 12/33] Add taints for args at callsite --- .../phasar/DataFlow/IfdsIde/FlowFunctions.h | 8 ++-- .../IDEFeatureInteractionAnalysis.cpp | 43 +++++++++++++------ .../IDEFeatureInteractionAnalysisTest.cpp | 2 +- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index ad41e1e10f..a7b3c41f57 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -499,13 +499,13 @@ template class FlowFunctionTemplates { struct GenManyAndKillAllOthers final : public FlowFunction { GenManyAndKillAllOthers(Container &&GenValues, d_t FromValue) - : GenValues(std::move(GenValues)), FromValue(std::move(FromValue)) {} + : GenValues(std::move(GenValues)), FromValue(FromValue) { + this->GenValues.insert(std::move(FromValue)); + } container_type computeTargets(d_t Source) override { if (Source == FromValue) { - auto Ret = GenValues; - Ret.insert(std::move(Source)); - return Ret; + return GenValues; } return {}; } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp index 8e967f367c..930fd4969f 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp @@ -186,9 +186,6 @@ auto IDEFeatureInteractionAnalysis::getRetFlowFunction(n_t CallSite, return mapFactsToCaller( llvm::cast(CallSite), ExitInst, {}, [GeneratesFact](const llvm::Value *RetVal, d_t Src) { - // ASK: Should all parameters be mapped back? Or just pointer params? - // ASK: IDEInstInteractionAnalysisTest.HandleCallTest_01: Should 14 - // (%call) hold for 'k' at ret? if (Src == RetVal) { return true; } @@ -203,8 +200,9 @@ auto IDEFeatureInteractionAnalysis::getCallToRetFlowFunction( n_t CallSite, n_t /* RetSite */, llvm::ArrayRef Callees) -> FlowFunctionPtrType { + bool GeneratesFact = false; if (llvm::all_of(Callees, [](f_t Fun) { return Fun->isDeclaration(); })) { - bool GeneratesFact = + GeneratesFact = !CallSite->getType()->isVoidTy() && TaintGen.isSource(CallSite); if (GeneratesFact) { return generateFromZero(CallSite); @@ -212,19 +210,20 @@ auto IDEFeatureInteractionAnalysis::getCallToRetFlowFunction( return identityFlow(); } + const auto *Call = llvm::cast(CallSite); auto Mapper = mapFactsAlongsideCallSite( - llvm::cast(CallSite), + Call, [](d_t Arg) { - return !Arg->getType()->isPointerTy(); - // return llvm::isa(Arg); + return true; + // return !Arg->getType()->isPointerTy(); + // return llvm::isa(Arg); }, /*PropagateGlobals*/ false); - // if (GeneratesFact) { - // return unionFlows(std::move(Mapper), - // generateFlowAndKillAllOthers(CallSite, - // getZeroValue())); - // } + if (GeneratesFact) { + return unionFlows(std::move(Mapper), + generateFlowAndKillAllOthers(CallSite, getZeroValue())); + } return Mapper; } @@ -653,6 +652,26 @@ auto IDEFeatureInteractionAnalysis::getCallToRetEdgeFunction( // << ": Gen from zero!\n"; return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); } + + // Capture interactions of the call instruction and its arguments. + const auto *CS = llvm::dyn_cast(CallSite); + for (const auto &Arg : CS->args()) { + // + // o_i --> o_i + // + // Edge function: + // + // o_i + // | + // %i = call o_i | \ \x.x \cup { commit of('%i = call H') } + // v + // o_i + // + if (CallNode == Arg && CallNode == RetSiteNode) { + return addEF(TaintGen.getGeneratedTaintsAt(CallSite), AddEFCache); + } + } + return EdgeIdentity{}; } diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp index 5adc0642ef..28656abc0c 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp @@ -249,7 +249,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_01) { GroundTruth.emplace( "main", 14, "k", std::set{"15", "1", "2", "13", "12", "9", "10", "11"}); - doAnalysisAndCompareResults("call_01_cpp.ll", {"main"}, GroundTruth, true); + doAnalysisAndCompareResults("call_01_cpp.ll", {"main"}, GroundTruth, false); } TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_02) { From 4f73b800489529507fcad991e09f39d4f4bae18b Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 20 May 2024 13:39:04 +0200 Subject: [PATCH 13/33] Add FTaint Analysis to controller --- .../PhasarLLVM/Utils/DataFlowAnalysisType.def | 1 + lib/Controller/AnalysisController.cpp | 3 + lib/Controller/AnalysisControllerInternal.h | 2 + lib/Controller/AnalysisControllerXIDEFIIA.cpp | 44 ++++++++ .../IDEFeatureInteractionAnalysis.cpp | 101 +++++++++++++++--- 5 files changed, 134 insertions(+), 17 deletions(-) create mode 100644 lib/Controller/AnalysisControllerXIDEFIIA.cpp diff --git a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def index 08cf9e9e08..532ba011d7 100644 --- a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def +++ b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def @@ -22,6 +22,7 @@ DATA_FLOW_ANALYSIS_TYPES(IFDSSolverTest, "ifds-solvertest", "Empty analysis. Jus DATA_FLOW_ANALYSIS_TYPES(IDELinearConstantAnalysis, "ide-lca", "Simple linear constant propagation") DATA_FLOW_ANALYSIS_TYPES(IDESolverTest, "ide-solvertest", "Empty analysis. Just to see that the IDE solver works") DATA_FLOW_ANALYSIS_TYPES(IDEInstInteractionAnalysis, "ide-iia", "Which instruction has influence on which other instructions?") +DATA_FLOW_ANALYSIS_TYPES(IDEFeatureTaintAnalysis, "ide-fiia", "Which instruction has influence on which other instructions?") DATA_FLOW_ANALYSIS_TYPES(IntraMonoFullConstantPropagation, "intra-mono-fca", "Simple constant propagation without the restriction to linear binary operations. Only works inTRA-procedurally") DATA_FLOW_ANALYSIS_TYPES(IntraMonoSolverTest, "intra-mono-solvertest", "Empty analysis. Just to see that the intraprocedural monotone solver works") DATA_FLOW_ANALYSIS_TYPES(InterMonoSolverTest, "inter-mono-solvertest", "Empty analysis. Just to see that the interprocedural monotone solver works") diff --git a/lib/Controller/AnalysisController.cpp b/lib/Controller/AnalysisController.cpp index fd77d2ec52..79b42977df 100644 --- a/lib/Controller/AnalysisController.cpp +++ b/lib/Controller/AnalysisController.cpp @@ -152,6 +152,9 @@ static void executeWholeProgram(AnalysisController::ControllerData &Data) { case DataFlowAnalysisType::IDEInstInteractionAnalysis: executeIDEIIA(Data); continue; + case DataFlowAnalysisType::IDEFeatureTaintAnalysis: + executeIDEFIIA(Data); + continue; case DataFlowAnalysisType::IntraMonoFullConstantPropagation: executeIntraMonoFullConstant(Data); continue; diff --git a/lib/Controller/AnalysisControllerInternal.h b/lib/Controller/AnalysisControllerInternal.h index 8a736e3a11..17dfbcdc45 100644 --- a/lib/Controller/AnalysisControllerInternal.h +++ b/lib/Controller/AnalysisControllerInternal.h @@ -53,6 +53,8 @@ executeIDESolverTest(AnalysisController::ControllerData &Data); LLVM_LIBRARY_VISIBILITY void executeIDEIIA(AnalysisController::ControllerData &Data); LLVM_LIBRARY_VISIBILITY void +executeIDEFIIA(AnalysisController::ControllerData &Data); +LLVM_LIBRARY_VISIBILITY void executeIntraMonoFullConstant(AnalysisController::ControllerData &Data); LLVM_LIBRARY_VISIBILITY void executeIntraMonoSolverTest(AnalysisController::ControllerData &Data); diff --git a/lib/Controller/AnalysisControllerXIDEFIIA.cpp b/lib/Controller/AnalysisControllerXIDEFIIA.cpp new file mode 100644 index 0000000000..12a6268b78 --- /dev/null +++ b/lib/Controller/AnalysisControllerXIDEFIIA.cpp @@ -0,0 +1,44 @@ +/****************************************************************************** + * Copyright (c) 2022 Martin Mory. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Martin Mory and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h" + +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Casting.h" + +#include "AnalysisControllerInternalIDE.h" + +using namespace psr; + +void controller::executeIDEFIIA(AnalysisController::ControllerData &Data) { + // use Phasar's instruction ids as testing labels + auto Generator = + [](std::variant + Current) -> std::set { + return std::visit( + [](const auto *InstOrGlob) -> std::set { + std::set Labels; + if (InstOrGlob->hasMetadata()) { + std::string Label = + llvm::cast( + InstOrGlob->getMetadata(PhasarConfig::MetaDataKind()) + ->getOperand(0)) + ->getString() + .str(); + Labels.insert(Label); + } + return Labels; + }, + Current); + }; + + executeIDEAnalysis(Data, Data.EntryPoints, + Generator); +} diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp index 930fd4969f..1c0d5cdbb2 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp @@ -80,11 +80,14 @@ auto IDEFeatureInteractionAnalysis::getNormalFlowFunction(n_t Curr, } if (const auto *Store = llvm::dyn_cast(Curr)) { - return lambdaFlow([Store, - PointerPTS = PT.getReachableAllocationSites( - Store->getPointerOperand(), true, Store), + auto PointerPTS = + PT.getReachableAllocationSites(Store->getPointerOperand(), true, Store); + container_type PointerRet(PointerPTS->begin(), PointerPTS->end()); + PointerRet.insert(Store->getPointerOperand()); + + return lambdaFlow([Store, PointerRet = std::move(PointerRet), GeneratesFact](d_t Src) -> container_type { - if (Store->getPointerOperand() == Src || PointerPTS->count(Src)) { + if (Store->getPointerOperand() == Src || PointerRet.count(Src)) { // Here, we are unsound! return {}; } @@ -93,16 +96,15 @@ auto IDEFeatureInteractionAnalysis::getNormalFlowFunction(n_t Curr, // y/Y now obtains its new value(s) from x/X // If a value is stored that holds we must generate all potential // memory locations the store might write to. - if (Store->getValueOperand() == Src) { - Facts.insert(Store->getPointerOperand()); - Facts.insert(PointerPTS->begin(), PointerPTS->end()); - } // ... or from zero, if we manually generate a fact here - if (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) { - Facts.insert(Store->getPointerOperand()); - Facts.insert(PointerPTS->begin(), PointerPTS->end()); + if (Store->getValueOperand() == Src || + (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src))) { + auto Facts = PointerRet; + Facts.insert(Src); + return Facts; } - return Facts; + + return {Src}; }); } @@ -278,12 +280,12 @@ struct IDEFeatureInteractionAnalysis::GenerateEF { llvm::report_fatal_error("Implemented in 'combine'"); } - friend bool operator==(const GenerateEF &L, const GenerateEF &R) { + constexpr friend bool operator==(const GenerateEF &L, const GenerateEF &R) { return L.Facts == R.Facts; } // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL - friend llvm::hash_code hash_value(const GenerateEF &EF) { + constexpr friend llvm::hash_code hash_value(const GenerateEF &EF) { return hash_value(EF.Facts); } }; @@ -525,6 +527,70 @@ EdgeFunction addEF(l_t &&Facts, CacheT &AddEFCache) { return AddEFCache.createEdgeFunction(std::move(Facts)); } +template +std::pair +extractFacts(const EdgeFunction &EF) { + if (const auto *GenEF = llvm::dyn_cast(EF)) { + return {GenEF->Facts, nullptr}; + } + if (const auto *AddEF = llvm::dyn_cast(EF)) { + return {AddEF->Facts, nullptr}; + } + if (const auto *GenEF = llvm::dyn_cast(EF)) { + return {0, &GenEF->Facts}; + } + if (const auto *AddEF = llvm::dyn_cast(EF)) { + return {0, &AddEF->Facts}; + } + llvm_unreachable("All edge function types handled"); +} + +template +IDEFeatureTaintEdgeFact unionTaints(const EdgeFunction &FirstEF, + const EdgeFunction &OtherEF) { + + auto [FirstSmallFacts, FirstLargeFacts] = + extractFacts(FirstEF); + auto [OtherSmallFacts, OtherLargeFacts] = + extractFacts(OtherEF); + + if (FirstLargeFacts) { + IDEFeatureTaintEdgeFact Ret = *FirstLargeFacts; + if (OtherLargeFacts) { + Ret.unionWith(*OtherLargeFacts); + } else { + Ret.unionWith(OtherSmallFacts); + } + return Ret; + } + if (OtherLargeFacts) { + IDEFeatureTaintEdgeFact Ret = *OtherLargeFacts; + Ret.unionWith(FirstSmallFacts); + return Ret; + } + // Both Small + FirstSmallFacts |= OtherSmallFacts; + return FirstSmallFacts; +} + +EdgeFunction iiaDefaultJoinOrNull(const EdgeFunction &This, + const EdgeFunction &OtherFunction) { + if (llvm::isa>(OtherFunction) || + llvm::isa>(This)) { + return OtherFunction; + } + + // Due to our caching, we can do a reference-equals here + if (llvm::isa>(OtherFunction) || + OtherFunction.referenceEquals(This) || llvm::isa>(This)) { + return This; + } + if (llvm::isa>(OtherFunction)) { + return AllBottom{}; + } + return nullptr; +} + } // namespace //////////////////////////////////////////////////////////////////////////////// @@ -566,12 +632,13 @@ IDEFeatureInteractionAnalysis::combine(const EdgeFunction &FirstEF, return FirstEF; } - if (auto Default = defaultJoinOrNull(FirstEF, OtherEF)) { + if (auto Default = iiaDefaultJoinOrNull(FirstEF, OtherEF)) { return Default; } - auto ThisFacts = FirstEF.computeTarget(0); - ThisFacts.unionWith(OtherEF.computeTarget(0)); + // auto ThisFacts = FirstEF.computeTarget(0); + // ThisFacts.unionWith(OtherEF.computeTarget(0)); + auto ThisFacts = unionTaints(FirstEF, OtherEF); if (FirstEF.isConstant() && OtherEF.isConstant()) { return genEF(std::move(ThisFacts), GenEFCache); From a7adbab4abf248af9c288f9d5aa9f62a08a968fd Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 21 May 2024 08:59:52 +0200 Subject: [PATCH 14/33] rename --- ...nteractionAnalysis.h => IDEFeatureTaintAnalysis.h} | 11 ++++++++++- lib/Controller/AnalysisControllerXIDEFIIA.cpp | 2 +- ...actionAnalysis.cpp => IDEFeatureTaintAnalysis.cpp} | 2 +- .../DataFlow/IfdsIde/Problems/CMakeLists.txt | 2 +- ...alysisTest.cpp => IDEFeatureTaintAnalysisTest.cpp} | 2 +- 5 files changed, 14 insertions(+), 5 deletions(-) rename include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/{IDEFeatureInteractionAnalysis.h => IDEFeatureTaintAnalysis.h} (96%) rename lib/PhasarLLVM/DataFlow/IfdsIde/Problems/{IDEFeatureInteractionAnalysis.cpp => IDEFeatureTaintAnalysis.cpp} (99%) rename unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/{IDEFeatureInteractionAnalysisTest.cpp => IDEFeatureTaintAnalysisTest.cpp} (99%) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h similarity index 96% rename from include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h rename to include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index 839e414e63..9e728c1de8 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -275,8 +275,17 @@ class IDEFeatureInteractionAnalysis FeatureTaintGenerator(std::forward(SrcDetector), std::forward(EFGen))) {} + IDEFeatureInteractionAnalysis(const IDEFeatureInteractionAnalysis &) = delete; + IDEFeatureInteractionAnalysis & + operator=(const IDEFeatureInteractionAnalysis &) = delete; + + IDEFeatureInteractionAnalysis(IDEFeatureInteractionAnalysis &&) noexcept = + default; + IDEFeatureInteractionAnalysis & + operator=(IDEFeatureInteractionAnalysis &&) noexcept = delete; + // The EF Caches are incomplete, so move the dtor into the .cpp - ~IDEFeatureInteractionAnalysis(); + ~IDEFeatureInteractionAnalysis() override; ////////////////////////////////////////////////////////////////////////////// /// Flow Functions diff --git a/lib/Controller/AnalysisControllerXIDEFIIA.cpp b/lib/Controller/AnalysisControllerXIDEFIIA.cpp index 12a6268b78..7b51a17913 100644 --- a/lib/Controller/AnalysisControllerXIDEFIIA.cpp +++ b/lib/Controller/AnalysisControllerXIDEFIIA.cpp @@ -7,7 +7,7 @@ * Martin Mory and others *****************************************************************************/ -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp similarity index 99% rename from lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp rename to lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index 1c0d5cdbb2..06f40bfa35 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -1,4 +1,4 @@ -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt index 29738963c8..7d5a903495 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt @@ -8,7 +8,7 @@ set(IfdsIdeProblemSources IDEGeneralizedLCATest.cpp IDEExtendedTaintAnalysisTest.cpp IDETSAnalysisFileIOTest.cpp - IDEFeatureInteractionAnalysisTest.cpp + IDEFeatureTaintAnalysisTest.cpp ) if(PHASAR_BUILD_OPENSSL_TS_UNITTESTS) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp similarity index 99% rename from unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp rename to unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index 28656abc0c..6988940833 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -1,4 +1,4 @@ -#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureInteractionAnalysis.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" #include "phasar/Config/Configuration.h" #include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" From 0e1ad6953efb651989db04d8fdd8b8464ddf158e Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 11 Jun 2024 09:12:31 +0200 Subject: [PATCH 15/33] rename FIIA Domain --- .../DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index 9e728c1de8..7e05f62d2b 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -162,7 +162,7 @@ template <> struct JoinLatticeTraits { } }; -struct IDEFeatureInteractionAnalysisDomain : LLVMAnalysisDomainDefault { +struct IDEFeatureTaintAnalysisDomain : LLVMAnalysisDomainDefault { using l_t = IDEFeatureTaintEdgeFact; }; @@ -247,7 +247,7 @@ class FeatureTaintGenerator { }; class IDEFeatureInteractionAnalysis - : public IDETabulationProblem { + : public IDETabulationProblem { public: IDEFeatureInteractionAnalysis(const LLVMProjectIRDB *IRDB, From 4753f6664d96eca629be58a140299ec6737c6c83 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 11 Jun 2024 09:16:28 +0200 Subject: [PATCH 16/33] rename --- .../Problems/IDEFeatureTaintAnalysis.h | 43 ++++++------ lib/Controller/AnalysisControllerXIDEFIIA.cpp | 4 +- .../Problems/IDEFeatureTaintAnalysis.cpp | 68 +++++++++---------- .../Problems/IDEFeatureTaintAnalysisTest.cpp | 2 +- 4 files changed, 56 insertions(+), 61 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index 7e05f62d2b..84b2d970f0 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -246,46 +246,41 @@ class FeatureTaintGenerator { PrinterFn Printer; }; -class IDEFeatureInteractionAnalysis +class IDEFeatureTaintAnalysis : public IDETabulationProblem { public: - IDEFeatureInteractionAnalysis(const LLVMProjectIRDB *IRDB, - LLVMAliasInfoRef PT, - std::vector EntryPoints, - FeatureTaintGenerator &&TaintGen); + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + FeatureTaintGenerator &&TaintGen); template - IDEFeatureInteractionAnalysis(const LLVMProjectIRDB *IRDB, - LLVMAliasInfoRef PT, - std::vector EntryPoints, - EdgeFactGenerator &&EFGen) - : IDEFeatureInteractionAnalysis( + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + EdgeFactGenerator &&EFGen) + : IDEFeatureTaintAnalysis( IRDB, PT, std::move(EntryPoints), FeatureTaintGenerator(std::forward(EFGen))) {} template - IDEFeatureInteractionAnalysis(const LLVMProjectIRDB *IRDB, - LLVMAliasInfoRef PT, - std::vector EntryPoints, - SourceDetector &&SrcDetector, - EdgeFactGenerator &&EFGen) - : IDEFeatureInteractionAnalysis( + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + SourceDetector &&SrcDetector, + EdgeFactGenerator &&EFGen) + : IDEFeatureTaintAnalysis( IRDB, PT, std::move(EntryPoints), FeatureTaintGenerator(std::forward(SrcDetector), std::forward(EFGen))) {} - IDEFeatureInteractionAnalysis(const IDEFeatureInteractionAnalysis &) = delete; - IDEFeatureInteractionAnalysis & - operator=(const IDEFeatureInteractionAnalysis &) = delete; + IDEFeatureTaintAnalysis(const IDEFeatureTaintAnalysis &) = delete; + IDEFeatureTaintAnalysis &operator=(const IDEFeatureTaintAnalysis &) = delete; - IDEFeatureInteractionAnalysis(IDEFeatureInteractionAnalysis &&) noexcept = - default; - IDEFeatureInteractionAnalysis & - operator=(IDEFeatureInteractionAnalysis &&) noexcept = delete; + IDEFeatureTaintAnalysis(IDEFeatureTaintAnalysis &&) noexcept = default; + IDEFeatureTaintAnalysis & + operator=(IDEFeatureTaintAnalysis &&) noexcept = delete; // The EF Caches are incomplete, so move the dtor into the .cpp - ~IDEFeatureInteractionAnalysis() override; + ~IDEFeatureTaintAnalysis() override; ////////////////////////////////////////////////////////////////////////////// /// Flow Functions diff --git a/lib/Controller/AnalysisControllerXIDEFIIA.cpp b/lib/Controller/AnalysisControllerXIDEFIIA.cpp index 7b51a17913..728083d7c0 100644 --- a/lib/Controller/AnalysisControllerXIDEFIIA.cpp +++ b/lib/Controller/AnalysisControllerXIDEFIIA.cpp @@ -39,6 +39,6 @@ void controller::executeIDEFIIA(AnalysisController::ControllerData &Data) { Current); }; - executeIDEAnalysis(Data, Data.EntryPoints, - Generator); + executeIDEAnalysis(Data, Data.EntryPoints, + Generator); } diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index 06f40bfa35..f91bcae822 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -27,17 +27,17 @@ using namespace psr; -using l_t = IDEFeatureInteractionAnalysisDomain::l_t; -using d_t = IDEFeatureInteractionAnalysisDomain::d_t; +using l_t = IDEFeatureTaintAnalysisDomain::l_t; +using d_t = IDEFeatureTaintAnalysisDomain::d_t; -IDEFeatureInteractionAnalysis::IDEFeatureInteractionAnalysis( +IDEFeatureTaintAnalysis::IDEFeatureTaintAnalysis( const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, std::vector EntryPoints, FeatureTaintGenerator &&TaintGen) - : IDETabulationProblem( + : IDETabulationProblem( IRDB, std::move(EntryPoints), LLVMZeroValue::getInstance()), TaintGen(std::move(TaintGen)), PT(PT) {} -IDEFeatureInteractionAnalysis::~IDEFeatureInteractionAnalysis() = default; +IDEFeatureTaintAnalysis::~IDEFeatureTaintAnalysis() = default; std::string psr::LToString(const IDEFeatureTaintEdgeFact &EdgeFact) { std::string Ret; @@ -48,8 +48,7 @@ std::string psr::LToString(const IDEFeatureTaintEdgeFact &EdgeFact) { return Ret; } -auto IDEFeatureInteractionAnalysis::getNormalFlowFunction(n_t Curr, - n_t /* Succ */) +auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) -> FlowFunctionPtrType { bool GeneratesFact = TaintGen.isSource(Curr); @@ -132,8 +131,7 @@ auto IDEFeatureInteractionAnalysis::getNormalFlowFunction(n_t Curr, }); } -auto IDEFeatureInteractionAnalysis::getCallFlowFunction(n_t CallSite, - f_t DestFun) +auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) -> FlowFunctionPtrType { if (DestFun->isDeclaration()) { @@ -170,10 +168,10 @@ auto IDEFeatureInteractionAnalysis::getCallFlowFunction(n_t CallSite, return MapFactsToCalleeFF; } -auto IDEFeatureInteractionAnalysis::getRetFlowFunction(n_t CallSite, - f_t /*CalleeFun*/, - n_t ExitInst, - n_t /* RetSite */) +auto IDEFeatureTaintAnalysis::getRetFlowFunction(n_t CallSite, + f_t /*CalleeFun*/, + n_t ExitInst, + n_t /* RetSite */) -> FlowFunctionPtrType { // Map return value back to the caller. If pointer parameters hold at the // end of a callee function generate all of those in the caller context. @@ -198,7 +196,7 @@ auto IDEFeatureInteractionAnalysis::getRetFlowFunction(n_t CallSite, }); } -auto IDEFeatureInteractionAnalysis::getCallToRetFlowFunction( +auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( n_t CallSite, n_t /* RetSite */, llvm::ArrayRef Callees) -> FlowFunctionPtrType { @@ -229,8 +227,8 @@ auto IDEFeatureInteractionAnalysis::getCallToRetFlowFunction( return Mapper; } -struct IDEFeatureInteractionAnalysis::AddFactsEF { - using l_t = IDEFeatureInteractionAnalysisDomain::l_t; +struct IDEFeatureTaintAnalysis::AddFactsEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; IDEFeatureTaintEdgeFact Facts; @@ -259,8 +257,8 @@ struct IDEFeatureInteractionAnalysis::AddFactsEF { } }; -struct IDEFeatureInteractionAnalysis::GenerateEF { - using l_t = IDEFeatureInteractionAnalysisDomain::l_t; +struct IDEFeatureTaintAnalysis::GenerateEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; IDEFeatureTaintEdgeFact Facts; @@ -292,7 +290,7 @@ struct IDEFeatureInteractionAnalysis::GenerateEF { namespace { struct AddSmallFactsEF { - using l_t = IDEFeatureInteractionAnalysisDomain::l_t; + using l_t = IDEFeatureTaintAnalysisDomain::l_t; uintptr_t Facts{}; @@ -322,7 +320,7 @@ struct AddSmallFactsEF { }; struct GenerateSmallEF { - using l_t = IDEFeatureInteractionAnalysisDomain::l_t; + using l_t = IDEFeatureTaintAnalysisDomain::l_t; uintptr_t Facts{}; @@ -596,8 +594,8 @@ EdgeFunction iiaDefaultJoinOrNull(const EdgeFunction &This, //////////////////////////////////////////////////////////////////////////////// EdgeFunction -IDEFeatureInteractionAnalysis::extend(const EdgeFunction &FirstEF, - const EdgeFunction &SecondEF) { +IDEFeatureTaintAnalysis::extend(const EdgeFunction &FirstEF, + const EdgeFunction &SecondEF) { auto Ret = [&] { if (auto Default = defaultComposeOrNull(FirstEF, SecondEF)) { // llvm::errs() << "defaultComposeOrNull>>\n"; @@ -620,8 +618,8 @@ IDEFeatureInteractionAnalysis::extend(const EdgeFunction &FirstEF, return Ret; } EdgeFunction -IDEFeatureInteractionAnalysis::combine(const EdgeFunction &FirstEF, - const EdgeFunction &OtherEF) { +IDEFeatureTaintAnalysis::combine(const EdgeFunction &FirstEF, + const EdgeFunction &OtherEF) { auto Ret = [&] { /// XXX: Here, we underapproximate joins with EdgeIdentity if (llvm::isa>(FirstEF)) { @@ -653,8 +651,10 @@ IDEFeatureInteractionAnalysis::combine(const EdgeFunction &FirstEF, return Ret; } -auto IDEFeatureInteractionAnalysis::getNormalEdgeFunction( - n_t Curr, d_t CurrNode, n_t /* Succ */, d_t SuccNode) -> EdgeFunction { +auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, + n_t /* Succ */, + d_t SuccNode) + -> EdgeFunction { if (isZeroValue(SuccNode) || CurrNode == SuccNode) { // We don't want to propagate any facts on zero @@ -687,8 +687,9 @@ auto IDEFeatureInteractionAnalysis::getNormalEdgeFunction( return EdgeIdentity{}; } -auto IDEFeatureInteractionAnalysis::getCallEdgeFunction( - n_t CallSite, d_t SrcNode, f_t /*DestinationFunction*/, d_t DestNode) +auto IDEFeatureTaintAnalysis::getCallEdgeFunction(n_t CallSite, d_t SrcNode, + f_t /*DestinationFunction*/, + d_t DestNode) -> EdgeFunction { if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { // Generate user edge-facts from zero @@ -698,7 +699,7 @@ auto IDEFeatureInteractionAnalysis::getCallEdgeFunction( return EdgeIdentity{}; } -auto IDEFeatureInteractionAnalysis::getReturnEdgeFunction( +auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( n_t CallSite, f_t /*CalleeFunction*/, n_t ExitStmt, d_t ExitNode, n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { @@ -709,7 +710,7 @@ auto IDEFeatureInteractionAnalysis::getReturnEdgeFunction( return EdgeIdentity{}; } -auto IDEFeatureInteractionAnalysis::getCallToRetEdgeFunction( +auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( n_t CallSite, d_t CallNode, n_t /*RetSite*/, d_t RetSiteNode, llvm::ArrayRef /*Callees*/) -> EdgeFunction { if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { @@ -742,8 +743,7 @@ auto IDEFeatureInteractionAnalysis::getCallToRetEdgeFunction( return EdgeIdentity{}; } -auto IDEFeatureInteractionAnalysis::initialSeeds() - -> InitialSeeds { +auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { InitialSeeds Seeds; LLVMBasedCFG CFG; @@ -778,11 +778,11 @@ auto IDEFeatureInteractionAnalysis::initialSeeds() return Seeds; } -bool IDEFeatureInteractionAnalysis::isZeroValue(d_t FlowFact) const noexcept { +bool IDEFeatureTaintAnalysis::isZeroValue(d_t FlowFact) const noexcept { return LLVMZeroValue::isLLVMZeroValue(FlowFact); } -void IDEFeatureInteractionAnalysis::emitTextReport( +void IDEFeatureTaintAnalysis::emitTextReport( const SolverResults &SR, llvm::raw_ostream &OS) { OS << "\n====================== IDE-Inst-Interaction-Analysis Report " "======================\n"; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index 6988940833..ff359389bb 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -87,7 +87,7 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { Current); }; assert(HA); - auto IIAProblem = createAnalysisProblem( + auto IIAProblem = createAnalysisProblem( *HA, EntryPoints, Generator); // if (PrintDump) { From a3b9b67c5fa2b79b817e2f479f38cabfa233d074 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 11 Jun 2024 16:03:29 +0200 Subject: [PATCH 17/33] Make PathSensitivityManagerMixin more self-contained --- .../DataFlow/PathSensitivity/PathSensitivityManagerMixin.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h index 6a986d99c7..c92c35aae3 100644 --- a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h +++ b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h @@ -26,6 +26,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/raw_ostream.h" #include @@ -33,10 +34,6 @@ #include #include -namespace llvm { -class DbgInfoIntrinsic; -} // namespace llvm - namespace psr { template class PathSensitivityManagerMixin { From 6e883d7869ae677c27d6fbba2555e2b28b2108c3 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 19 Jun 2024 13:47:32 +0200 Subject: [PATCH 18/33] Fix top edge facts + further debugging --- .../DataFlow/IfdsIde/LLVMSolverResults.h | 1 + .../Problems/IDEFeatureTaintAnalysis.h | 21 ++- .../Problems/IDEFeatureTaintAnalysis.cpp | 124 ++++++++++++++++-- .../inst_interaction/CMakeLists.txt | 2 + .../inst_interaction/rvo_02.cpp | 13 ++ .../inst_interaction/rvo_03.cpp | 54 ++++++++ .../Problems/IDEFeatureTaintAnalysisTest.cpp | 104 ++++++++++++++- 7 files changed, 300 insertions(+), 19 deletions(-) create mode 100644 test/llvm_test_code/inst_interaction/rvo_02.cpp create mode 100644 test/llvm_test_code/inst_interaction/rvo_03.cpp diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h index 4d17597b70..b4f7676f6f 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h @@ -11,6 +11,7 @@ #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_LLVMSOLVERRESULTS_H #include "phasar/DataFlow/IfdsIde/SolverResults.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/JoinLattice.h" #include "phasar/Utils/Logger.h" diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index 84b2d970f0..3e174cacaf 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -93,7 +93,7 @@ struct IDEFeatureTaintEdgeFact { } friend bool operator==(const IDEFeatureTaintEdgeFact &Lhs, - const IDEFeatureTaintEdgeFact &Rhs) { + const IDEFeatureTaintEdgeFact &Rhs) noexcept { bool LeftEmpty = Lhs.Taints.none(); bool RightEmpty = Rhs.Taints.none(); if (LeftEmpty || RightEmpty) { @@ -118,17 +118,28 @@ struct IDEFeatureTaintEdgeFact { [](auto Word) { return Word == 0; }); } + friend bool operator!=(const IDEFeatureTaintEdgeFact &Lhs, + const IDEFeatureTaintEdgeFact &Rhs) noexcept { + return !(Lhs == Rhs); + } + template [[nodiscard]] std::string str() const { auto BV = BitVectorSet::fromBits(Taints); return LToString(BV); } template [[nodiscard]] auto toBVSet() const { + if (isTop()) { + return BitVectorSet(); + } return BitVectorSet::fromBits(Taints); } template [[nodiscard]] auto toSet() const { std::set Ret; + if (isTop()) { + return Ret; + } for (const auto &Elem : this->template toBVSet()) { Ret.insert(Elem); @@ -295,8 +306,8 @@ class IDEFeatureTaintAnalysis FlowFunctionPtrType getCallToRetFlowFunction(n_t CallSite, n_t RetSite, llvm::ArrayRef Callees) override; - // FlowFunctionPtrType getSummaryFlowFunction(n_t CallSite, - // f_t DestFun) override; + FlowFunctionPtrType getSummaryFlowFunction(n_t CallSite, + f_t DestFun) override; ////////////////////////////////////////////////////////////////////////////// /// Edge Functions @@ -318,8 +329,8 @@ class IDEFeatureTaintAnalysis d_t RetSiteNode, llvm::ArrayRef Callees) override; - // EdgeFunction getSummaryEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, - // d_t SuccNode) override; + EdgeFunction getSummaryEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, + d_t SuccNode) override; ////////////////////////////////////////////////////////////////////////////// /// Misc diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index f91bcae822..2054ce6eba 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -16,7 +16,9 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/IR/Constant.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -48,6 +50,56 @@ std::string psr::LToString(const IDEFeatureTaintEdgeFact &EdgeFact) { return Ret; } +static bool isMustAlias(const llvm::Value *Val1, + const llvm::Value *Val2) noexcept { + + const auto *Base1 = Val1->stripPointerCastsAndAliases(); + const auto *Base2 = Val2->stripPointerCastsAndAliases(); + if (Base1 == Base2) { + return true; + } + + // Note: We are not field-sensitive + + const auto *Load1 = llvm::dyn_cast(Base1); + if (Load1 && + Load1->getPointerOperand()->stripPointerCastsAndAliases() == Base2) { + return true; + } + + const auto *Load2 = llvm::dyn_cast(Base2); + if (Load2 && + Load2->getPointerOperand()->stripPointerCastsAndAliases() == Base1) { + return true; + } + if (Load1 && Load2 && + Load1->getPointerOperand() == Load2->getPointerOperand()) { + return true; + } + + // TODO: handle more cases + + return false; +} + +static bool canKillPointerOp(const llvm::Value *PointerOp, + const llvm::Value *Src, + const IDEFeatureTaintAnalysis::container_type + &PointerOpMayAliases) noexcept { + if (PointerOp == Src || isMustAlias(PointerOp, Src)) { + return true; + } + + // For precision, we may want to kill some facts unsoundly + + if (llvm::isa(Src) || + llvm::isa(PointerOp)) { + return PointerOpMayAliases.count(Src); + } + + return false; +} + auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) -> FlowFunctionPtrType { bool GeneratesFact = TaintGen.isSource(Curr); @@ -86,8 +138,7 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) return lambdaFlow([Store, PointerRet = std::move(PointerRet), GeneratesFact](d_t Src) -> container_type { - if (Store->getPointerOperand() == Src || PointerRet.count(Src)) { - // Here, we are unsound! + if (canKillPointerOp(Store->getPointerOperand(), Src, PointerRet)) { return {}; } container_type Facts; @@ -97,6 +148,7 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) // memory locations the store might write to. // ... or from zero, if we manually generate a fact here if (Store->getValueOperand() == Src || + Store->getValueOperand()->stripPointerCastsAndAliases() == Src || (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src))) { auto Facts = PointerRet; Facts.insert(Src); @@ -144,7 +196,8 @@ auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) // Map actual to formal parameters. auto MapFactsToCalleeFF = mapFactsToCallee( CS, DestFun, [CS](const llvm::Value *ActualArg, ByConstRef Src) { - if (d_t(ActualArg) != Src) { + if (ActualArg != Src && + ActualArg->stripPointerCastsAndAliases() != Src) { return false; } @@ -227,6 +280,43 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( return Mapper; } +auto IDEFeatureTaintAnalysis::getSummaryFlowFunction(n_t CallSite, f_t DestFun) + -> FlowFunctionPtrType { + if (const auto *MemTrn = llvm::dyn_cast(CallSite)) { + + bool GeneratesFact = TaintGen.isSource(CallSite); + + auto PointerPTS = + PT.getReachableAllocationSites(MemTrn->getDest(), true, MemTrn); + container_type PointerRet(PointerPTS->begin(), PointerPTS->end()); + PointerRet.insert(MemTrn->getDest()); + return lambdaFlow([MemTrn, PointerRet = std::move(PointerRet), + GeneratesFact](d_t Src) -> container_type { + if (canKillPointerOp(MemTrn->getDest(), Src, PointerRet)) { + return {}; + } + container_type Facts; + Facts.insert(Src); + // y/Y now obtains its new value(s) from x/X + // If a value is stored that holds we must generate all potential + // memory locations the store might write to. + // ... or from zero, if we manually generate a fact here + if (MemTrn->getSource() == Src || + MemTrn->getSource()->stripInBoundsConstantOffsets() == Src || + (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src))) { + + auto Facts = PointerRet; + Facts.insert(Src); + return Facts; + } + + return {Src}; + }); + } + + return nullptr; +} + struct IDEFeatureTaintAnalysis::AddFactsEF { using l_t = IDEFeatureTaintAnalysisDomain::l_t; @@ -671,16 +761,16 @@ auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, } // Overrides at store instructions - if (const auto *Store = llvm::dyn_cast(Curr)) { - if (CurrNode == Store->getValueOperand()) { - // Store tainted value + // if (const auto *Store = llvm::dyn_cast(Curr)) { + // if (CurrNode == Store->getValueOperand()) { + // // Store tainted value - // propagate facts unchanged. User edge-facts are generated from zero. + // // propagate facts unchanged. User edge-facts are generated from zero. - // llvm::errs() << "Store Identity\n"; - return EdgeIdentity{}; - } - } + // // llvm::errs() << "Store Identity\n"; + // return EdgeIdentity{}; + // } + // } // llvm::errs() << "Fallback Identity\n"; // Otherwise stick to identity. @@ -743,6 +833,18 @@ auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( return EdgeIdentity{}; } +auto IDEFeatureTaintAnalysis::getSummaryEdgeFunction(n_t Curr, d_t CurrNode, + n_t Succ, d_t SuccNode) + -> EdgeFunction { + if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { + // Generate user edge-facts from zero + + return genEF(TaintGen.getGeneratedTaintsAt(Curr), GenEFCache); + } + + return EdgeIdentity{}; +} + auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { InitialSeeds Seeds; diff --git a/test/llvm_test_code/inst_interaction/CMakeLists.txt b/test/llvm_test_code/inst_interaction/CMakeLists.txt index ecd10f3a7c..aaff9adacb 100644 --- a/test/llvm_test_code/inst_interaction/CMakeLists.txt +++ b/test/llvm_test_code/inst_interaction/CMakeLists.txt @@ -27,6 +27,8 @@ set(Sources KillTest_02.cpp return_01.cpp rvo_01.cpp + rvo_02.cpp + rvo_03.cpp struct_01.cpp struct_02.cpp ) diff --git a/test/llvm_test_code/inst_interaction/rvo_02.cpp b/test/llvm_test_code/inst_interaction/rvo_02.cpp new file mode 100644 index 0000000000..113e3bfe93 --- /dev/null +++ b/test/llvm_test_code/inst_interaction/rvo_02.cpp @@ -0,0 +1,13 @@ +#include + +int g = 0; +void functionWithoutInput() { g = 42; } +std::string createString() { return "My String"; } + +int main() { + std::string str; + functionWithoutInput(); + str = "1234"; + str = createString(); + return str.size(); +} diff --git a/test/llvm_test_code/inst_interaction/rvo_03.cpp b/test/llvm_test_code/inst_interaction/rvo_03.cpp new file mode 100644 index 0000000000..d9390f0755 --- /dev/null +++ b/test/llvm_test_code/inst_interaction/rvo_03.cpp @@ -0,0 +1,54 @@ + +#include +#include +#include + +class String { +public: + String() noexcept = default; + + String(const char *Data) : Length(strlen(Data)) { + auto *Dat = new char[Length]; + this->Data = Dat; + memcpy(Dat, Data, Length); + } + ~String() { delete[] Data; } + + String(String &&Other) noexcept : Data(Other.Data), Length(Other.Length) { + Other.Data = nullptr; + Other.Length = 0; + } + + void swap(String &Other) noexcept { + const auto *Dat = Data; + Data = Other.Data; + Other.Data = Dat; + + auto Len = Length; + Length = Other.Length; + Other.Length = Len; + } + + String &operator=(String &&Other) noexcept { + String(std::move(Other)).swap(*this); + return *this; + } + + [[nodiscard]] size_t size() const noexcept { return Length; } + +private: + const char *Data{}; + size_t Length{}; +}; + +int g = 0; +void functionWithoutInput() { g = 42; } +String createString() { return "My String"; } + +int main() { + String Str; + functionWithoutInput(); + Str = "1234"; + Str = createString(); + return Str.size(); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index ff359389bb..c14da6c603 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -4,6 +4,7 @@ #include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" #include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" #include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h" #include "phasar/PhasarLLVM/HelperAnalyses.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" #include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" @@ -13,6 +14,8 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Twine.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "TestConfig.h" #include "gtest/gtest.h" @@ -48,6 +51,11 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { HelperAnalysisConfig{}.withCGType(CallGraphAnalysisType::CHA)); IRDB = &HA->getProjectIRDB(); + for (const auto &Glob : IRDB->getModule()->globals()) { + BitVectorSet BV; + BV.insert(getMetaDataID(&Glob)); + } + // Initialze IDs for (const auto *Inst : IRDB->getAllInstructions()) { BitVectorSet BV; @@ -97,8 +105,9 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { IDESolver IIASolver(IIAProblem, &HA->getICFG()); IIASolver.solve(); if (PrintDump) { - IRDB->emitPreprocessedIR(llvm::outs()); - IIASolver.dumpResults(); + // IRDB->emitPreprocessedIR(llvm::outs()); + // IIASolver.dumpResults(); + printDump(HA->getProjectIRDB(), IIASolver.getSolverResults()); } // do the comparison for (const auto &[FunName, SrcLine, VarName, LatticeVal] : GroundTruth) { @@ -130,6 +139,62 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { void TearDown() override {} + // See vara::PhasarTaintAnalysis::taintsForInst + [[nodiscard]] inline std::set + taintsForInst(const llvm::Instruction *Inst, + SolverResults + SR) { + + if (const auto *Ret = llvm::dyn_cast(Inst)) { + if (Ret->getNumOperands() == 0) { + return {}; + } + } else if (llvm::isa(Inst)) { + return {}; + } + + std::set AggregatedTaints; + + if (Inst->getType()->isVoidTy()) { // For void types, we need to look what + // taints flow into the inst + + // auto Results = SR.resultsAt(Inst); + assert(Inst->getNumOperands() >= 1 && + "Found case without first operand."); + AggregatedTaints = + SR.resultAt(Inst, Inst->getOperand(0)).toSet(); + + } else { + auto Results = SR.resultsAtInLLVMSSA(Inst); + auto SearchPosTaints = Results.find(Inst); + if (SearchPosTaints != Results.end()) { + AggregatedTaints = SearchPosTaints->second.toSet(); + } + } + + // additionalStaticTaints + AggregatedTaints.insert(getMetaDataID(Inst)); + + return AggregatedTaints; + } + + void printDump(const LLVMProjectIRDB &IRDB, + SolverResults + SR) { + const llvm::Function *CurrFun = nullptr; + for (const auto *Inst : IRDB.getAllInstructions()) { + if (CurrFun != Inst->getFunction()) { + CurrFun = Inst->getFunction(); + llvm::outs() << "\n=================== '" << CurrFun->getName() + << "' ===================\n"; + } + llvm::outs() << " N: " << llvmIRToString(Inst) << '\n'; + llvm::outs() << " D: " << printSet(taintsForInst(Inst, SR)) << "\n\n"; + } + } + }; // Test Fixture TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_01) { @@ -323,7 +388,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_01) { GroundTruth.emplace("main", 9, "retval", std::set{"3"}); GroundTruth.emplace("main", 9, "i", std::set{"7"}); GroundTruth.emplace("main", 9, "j", std::set{"0", "5", "6"}); - doAnalysisAndCompareResults("global_01_cpp.ll", {"main"}, GroundTruth, false); + doAnalysisAndCompareResults("global_01_cpp.ll", {"main"}, GroundTruth, true); } TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_02) { @@ -409,6 +474,39 @@ PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_01) { std::set{"66", "9", "72", "73", "71"}); doAnalysisAndCompareResults("rvo_01_cpp.ll", {"main"}, GroundTruth, false); }) + +PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_02) { + // GTEST_SKIP() << "This test heavily depends on the used stdlib version. + // TODO: " + // "add a better one"; + + std::set GroundTruth; + // GroundTruth.emplace("main", 16, "retval", std::set{"75", + // "76"}); GroundTruth.emplace("main", 16, "str", + // std::set{"70", "65", "72", "74", "77"}); + // GroundTruth.emplace("main", 16, "ref.tmp", + // std::set{"66", "9", "72", "73", "71"}); + doAnalysisAndCompareResults("rvo_02_cpp.ll", {"main"}, GroundTruth, true); + + ASSERT_FALSE(true) << "Add GroundTruth!"; +}) + +PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_03) { + // GTEST_SKIP() << "This test heavily depends on the used stdlib version. + // TODO: " + // "add a better one"; + + std::set GroundTruth; + // GroundTruth.emplace("main", 16, "retval", std::set{"75", + // "76"}); GroundTruth.emplace("main", 16, "str", + // std::set{"70", "65", "72", "74", "77"}); + // GroundTruth.emplace("main", 16, "ref.tmp", + // std::set{"66", "9", "72", "73", "71"}); + doAnalysisAndCompareResults("rvo_03_cpp.ll", {"main"}, GroundTruth, true); + + ASSERT_FALSE(true) << "Add GroundTruth!"; +}) + } // namespace int main(int Argc, char **Argv) { From d940b7ed00ee0d1d6ef2a0e0acb270fdf105cd7a Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 9 Aug 2024 09:34:36 +0200 Subject: [PATCH 19/33] IMprove alias handling in store ff --- .../Problems/IDEFeatureTaintAnalysis.cpp | 49 +++++++++++++------ 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index 2054ce6eba..8a26a1bcaf 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -136,26 +136,45 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) container_type PointerRet(PointerPTS->begin(), PointerPTS->end()); PointerRet.insert(Store->getPointerOperand()); + auto ValuePTS = + PT.getReachableAllocationSites(Store->getValueOperand(), true, Store); + + // llvm::errs() << "At " << llvmIRToString(Store) << ":\n"; + // llvm::errs() << "> PointerRet:\n"; + // for (const auto *Ptr : PointerRet) { + // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; + // } + // llvm::errs() << "> ValuePTS:\n"; + // for (const auto *Ptr : *ValuePTS) { + // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; + // } + return lambdaFlow([Store, PointerRet = std::move(PointerRet), + ValuePTS = std::move(ValuePTS), GeneratesFact](d_t Src) -> container_type { - if (canKillPointerOp(Store->getPointerOperand(), Src, PointerRet)) { + if (Store->getPointerOperand() == Src || + (PointerRet.count(Src) && + canKillPointerOp(Store->getPointerOperand(), Src, PointerRet))) { + // llvm::errs() << "Kill pointer op " << llvmIRToShortString(Src) << " + // at " + // << llvmIRToString(Store) << '\n'; return {}; } - container_type Facts; - Facts.insert(Src); - // y/Y now obtains its new value(s) from x/X - // If a value is stored that holds we must generate all potential - // memory locations the store might write to. - // ... or from zero, if we manually generate a fact here - if (Store->getValueOperand() == Src || - Store->getValueOperand()->stripPointerCastsAndAliases() == Src || - (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src))) { - auto Facts = PointerRet; - Facts.insert(Src); - return Facts; - } + container_type Facts = [&] { + // y/Y now obtains its new value(s) from x/X + // If a value is stored that holds we must generate all potential + // memory locations the store might write to. + // ... or from zero, if we manually generate a fact here + if (Store->getValueOperand() == Src || + (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) || + ValuePTS->count(Src)) { + return PointerRet; + } + return container_type(); + }(); - return {Src}; + Facts.insert(Src); + return Facts; }); } From 7a05228e42fd5ea40847d02c2fdbc31afa4b30e3 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 9 Aug 2024 10:52:15 +0200 Subject: [PATCH 20/33] Handle memset + add one more rvo test --- .../Problems/IDEFeatureTaintAnalysis.cpp | 157 +++++++++--------- .../inst_interaction/CMakeLists.txt | 1 + .../inst_interaction/rvo_04.cpp | 19 +++ .../Problems/IDEFeatureTaintAnalysisTest.cpp | 28 +++- 4 files changed, 118 insertions(+), 87 deletions(-) create mode 100644 test/llvm_test_code/inst_interaction/rvo_04.cpp diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index 8a26a1bcaf..6d1cf2d4a7 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -8,13 +8,11 @@ #include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" #include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" -#include "phasar/Utils/BitVectorSet.h" #include "phasar/Utils/ByRef.h" #include "phasar/Utils/Printer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -100,6 +98,64 @@ static bool canKillPointerOp(const llvm::Value *PointerOp, return false; } +static auto getStoreFF(bool GeneratesFact, LLVMAliasInfoRef PT, + const llvm::Instruction *Inst, const llvm::Value *Dest, + const llvm::Value *Value) { + + using container_type = IDEFeatureTaintAnalysis::container_type; + + auto PointerPTS = PT.getReachableAllocationSites(Dest, true, Inst); + container_type PointerRet(PointerPTS->begin(), PointerPTS->end()); + PointerRet.insert(Dest); + + auto ValuePTS = PT.getReachableAllocationSites(Value, true, Inst); + + // llvm::errs() << "At " << llvmIRToString(Inst) << ":\n"; + // llvm::errs() << "> PointerRet:\n"; + // for (const auto *Ptr : PointerRet) { + // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; + // } + // llvm::errs() << "> ValuePTS:\n"; + // for (const auto *Ptr : *ValuePTS) { + // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; + // } + + return FlowFunctionTemplates::lambdaFlow( + [Dest, Value, PointerRet = std::move(PointerRet), + ValuePTS = std::move(ValuePTS), + GeneratesFact](d_t Src) -> container_type { + if (Dest == Src || (PointerRet.count(Src) && + canKillPointerOp(Dest, Src, PointerRet))) { + return {}; + } + container_type Facts = [&] { + // y/Y now obtains its new value(s) from x/X + // If a value is stored that holds we must generate all potential + // memory locations the store might write to. + // ... or from zero, if we manually generate a fact here + if (Value == Src || + (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) || + ValuePTS->count(Src)) { + // llvm::errs() << "> Store\n"; + return PointerRet; + } + + return container_type(); + }(); + + Facts.insert(Src); + + // llvm::errs() << "Gen { "; + + // llvm::interleaveComma(Facts, llvm::errs(), [](const auto *Ptr) { + // llvm::errs() << llvmIRToShortString(Ptr); + // }); + // llvm::errs() << " } at " << llvmIRToString(Inst) << '\n'; + + return Facts; + }); +} + auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) -> FlowFunctionPtrType { bool GeneratesFact = TaintGen.isSource(Curr); @@ -131,51 +187,8 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) } if (const auto *Store = llvm::dyn_cast(Curr)) { - auto PointerPTS = - PT.getReachableAllocationSites(Store->getPointerOperand(), true, Store); - container_type PointerRet(PointerPTS->begin(), PointerPTS->end()); - PointerRet.insert(Store->getPointerOperand()); - - auto ValuePTS = - PT.getReachableAllocationSites(Store->getValueOperand(), true, Store); - - // llvm::errs() << "At " << llvmIRToString(Store) << ":\n"; - // llvm::errs() << "> PointerRet:\n"; - // for (const auto *Ptr : PointerRet) { - // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; - // } - // llvm::errs() << "> ValuePTS:\n"; - // for (const auto *Ptr : *ValuePTS) { - // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; - // } - - return lambdaFlow([Store, PointerRet = std::move(PointerRet), - ValuePTS = std::move(ValuePTS), - GeneratesFact](d_t Src) -> container_type { - if (Store->getPointerOperand() == Src || - (PointerRet.count(Src) && - canKillPointerOp(Store->getPointerOperand(), Src, PointerRet))) { - // llvm::errs() << "Kill pointer op " << llvmIRToShortString(Src) << " - // at " - // << llvmIRToString(Store) << '\n'; - return {}; - } - container_type Facts = [&] { - // y/Y now obtains its new value(s) from x/X - // If a value is stored that holds we must generate all potential - // memory locations the store might write to. - // ... or from zero, if we manually generate a fact here - if (Store->getValueOperand() == Src || - (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) || - ValuePTS->count(Src)) { - return PointerRet; - } - return container_type(); - }(); - - Facts.insert(Src); - return Facts; - }); + return getStoreFF(GeneratesFact, PT, Store, Store->getPointerOperand(), + Store->getValueOperand()); } // Fallback @@ -283,9 +296,19 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( } const auto *Call = llvm::cast(CallSite); + + // Assume, the sret param is at index 0 + const auto *SRet = + Call->hasStructRetAttr() ? Call->getArgOperand(0) : nullptr; + auto Mapper = mapFactsAlongsideCallSite( Call, - [](d_t Arg) { + [SRet](d_t Arg) { + if (SRet == Arg) { + // perform strong update here + return false; + } + return true; // return !Arg->getType()->isPointerTy(); // return llvm::isa(Arg); @@ -299,38 +322,16 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( return Mapper; } -auto IDEFeatureTaintAnalysis::getSummaryFlowFunction(n_t CallSite, f_t DestFun) +auto IDEFeatureTaintAnalysis::getSummaryFlowFunction(n_t CallSite, + f_t /*DestFun*/) -> FlowFunctionPtrType { if (const auto *MemTrn = llvm::dyn_cast(CallSite)) { - - bool GeneratesFact = TaintGen.isSource(CallSite); - - auto PointerPTS = - PT.getReachableAllocationSites(MemTrn->getDest(), true, MemTrn); - container_type PointerRet(PointerPTS->begin(), PointerPTS->end()); - PointerRet.insert(MemTrn->getDest()); - return lambdaFlow([MemTrn, PointerRet = std::move(PointerRet), - GeneratesFact](d_t Src) -> container_type { - if (canKillPointerOp(MemTrn->getDest(), Src, PointerRet)) { - return {}; - } - container_type Facts; - Facts.insert(Src); - // y/Y now obtains its new value(s) from x/X - // If a value is stored that holds we must generate all potential - // memory locations the store might write to. - // ... or from zero, if we manually generate a fact here - if (MemTrn->getSource() == Src || - MemTrn->getSource()->stripInBoundsConstantOffsets() == Src || - (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src))) { - - auto Facts = PointerRet; - Facts.insert(Src); - return Facts; - } - - return {Src}; - }); + return getStoreFF(TaintGen.isSource(CallSite), PT, MemTrn, + MemTrn->getDest(), MemTrn->getSource()); + } + if (const auto *MemSet = llvm::dyn_cast(CallSite)) { + return getStoreFF(TaintGen.isSource(CallSite), PT, MemSet, + MemSet->getDest(), MemSet->getValue()); } return nullptr; diff --git a/test/llvm_test_code/inst_interaction/CMakeLists.txt b/test/llvm_test_code/inst_interaction/CMakeLists.txt index aaff9adacb..a111a7103d 100644 --- a/test/llvm_test_code/inst_interaction/CMakeLists.txt +++ b/test/llvm_test_code/inst_interaction/CMakeLists.txt @@ -29,6 +29,7 @@ set(Sources rvo_01.cpp rvo_02.cpp rvo_03.cpp + rvo_04.cpp struct_01.cpp struct_02.cpp ) diff --git a/test/llvm_test_code/inst_interaction/rvo_04.cpp b/test/llvm_test_code/inst_interaction/rvo_04.cpp new file mode 100644 index 0000000000..9aca5f2608 --- /dev/null +++ b/test/llvm_test_code/inst_interaction/rvo_04.cpp @@ -0,0 +1,19 @@ + +#include +#include +#include + +struct Foo { + int X = 0; + + Foo() noexcept = default; + Foo(const Foo &Other) noexcept : X(Other.X) {} +}; + +Foo createFoo() { return {}; } + +int main() { + Foo F; + F = createFoo(); + return F.X; +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index c14da6c603..9a3330f796 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -106,7 +106,9 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { IIASolver.solve(); if (PrintDump) { // IRDB->emitPreprocessedIR(llvm::outs()); - // IIASolver.dumpResults(); + IIASolver.dumpResults(); + llvm::outs() + << "\n======================================================\n"; printDump(HA->getProjectIRDB(), IIASolver.getSolverResults()); } // do the comparison @@ -261,7 +263,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_06) { GroundTruth.emplace("main", 19, "k", std::set{"6"}); GroundTruth.emplace("main", 19, "p", std::set{"1", "2", "9", "11"}); - doAnalysisAndCompareResults("basic_06_cpp.ll", {"main"}, GroundTruth, false); + doAnalysisAndCompareResults("basic_06_cpp.ll", {"main"}, GroundTruth, true); } TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_07) { @@ -388,7 +390,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_01) { GroundTruth.emplace("main", 9, "retval", std::set{"3"}); GroundTruth.emplace("main", 9, "i", std::set{"7"}); GroundTruth.emplace("main", 9, "j", std::set{"0", "5", "6"}); - doAnalysisAndCompareResults("global_01_cpp.ll", {"main"}, GroundTruth, true); + doAnalysisAndCompareResults("global_01_cpp.ll", {"main"}, GroundTruth, false); } TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_02) { @@ -491,11 +493,7 @@ PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_02) { ASSERT_FALSE(true) << "Add GroundTruth!"; }) -PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_03) { - // GTEST_SKIP() << "This test heavily depends on the used stdlib version. - // TODO: " - // "add a better one"; - +TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_03) { std::set GroundTruth; // GroundTruth.emplace("main", 16, "retval", std::set{"75", // "76"}); GroundTruth.emplace("main", 16, "str", @@ -505,7 +503,19 @@ PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_03) { doAnalysisAndCompareResults("rvo_03_cpp.ll", {"main"}, GroundTruth, true); ASSERT_FALSE(true) << "Add GroundTruth!"; -}) +} + +TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_04) { + std::set GroundTruth; + // GroundTruth.emplace("main", 16, "retval", std::set{"75", + // "76"}); GroundTruth.emplace("main", 16, "str", + // std::set{"70", "65", "72", "74", "77"}); + // GroundTruth.emplace("main", 16, "ref.tmp", + // std::set{"66", "9", "72", "73", "71"}); + doAnalysisAndCompareResults("rvo_04_cpp.ll", {"main"}, GroundTruth, true); + + ASSERT_FALSE(true) << "Add GroundTruth!"; +} } // namespace From 9fbc41793f147af27ddabaaaca2aa5ebe142c226 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 10 Sep 2024 09:27:46 +0200 Subject: [PATCH 21/33] cs taints do not hold within callees --- external/json-schema-validator | 2 +- .../Problems/IDEFeatureTaintAnalysis.cpp | 52 ++++++++++--------- .../inst_interaction/rvo_04.cpp | 1 + 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/external/json-schema-validator b/external/json-schema-validator index 491ac44026..349cba9f7e 160000 --- a/external/json-schema-validator +++ b/external/json-schema-validator @@ -1 +1 @@ -Subproject commit 491ac44026e08f31790f5cacffa62e168bb35e32 +Subproject commit 349cba9f7e3cb423bbc1811bdd9f6770f520b468 diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index 6d1cf2d4a7..bfaf461ca4 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -110,11 +111,11 @@ static auto getStoreFF(bool GeneratesFact, LLVMAliasInfoRef PT, auto ValuePTS = PT.getReachableAllocationSites(Value, true, Inst); - // llvm::errs() << "At " << llvmIRToString(Inst) << ":\n"; - // llvm::errs() << "> PointerRet:\n"; - // for (const auto *Ptr : PointerRet) { - // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; - // } + llvm::errs() << "At " << llvmIRToString(Inst) << ":\n"; + llvm::errs() << "> PointerRet:\n"; + for (const auto *Ptr : PointerRet) { + llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; + } // llvm::errs() << "> ValuePTS:\n"; // for (const auto *Ptr : *ValuePTS) { // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; @@ -241,14 +242,14 @@ auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) }); // Generate the artificially introduced RVO parameters from zero value. - const auto *SRetFormal = - CS->hasStructRetAttr() ? DestFun->getArg(0) : nullptr; + // const auto *SRetFormal = + // CS->hasStructRetAttr() ? DestFun->getArg(0) : nullptr; - if (SRetFormal && TaintGen.isSource(CallSite)) { - return unionFlows( - std::move(MapFactsToCalleeFF), - generateFlowAndKillAllOthers(SRetFormal, this->getZeroValue())); - } + // if (SRetFormal && TaintGen.isSource(CallSite)) { + // return unionFlows( + // std::move(MapFactsToCalleeFF), + // generateFlowAndKillAllOthers(SRetFormal, this->getZeroValue())); + // } return MapFactsToCalleeFF; } @@ -285,26 +286,27 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( n_t CallSite, n_t /* RetSite */, llvm::ArrayRef Callees) -> FlowFunctionPtrType { - bool GeneratesFact = false; + const auto *Call = llvm::cast(CallSite); + + // Assume, the sret param is at index 0 + const llvm::Value *RetVal = + !Call->getType()->isVoidTy() + ? CallSite + : (Call->hasStructRetAttr() ? Call->getArgOperand(0) : nullptr); + + bool GeneratesFact = RetVal && TaintGen.isSource(CallSite); + if (llvm::all_of(Callees, [](f_t Fun) { return Fun->isDeclaration(); })) { - GeneratesFact = - !CallSite->getType()->isVoidTy() && TaintGen.isSource(CallSite); if (GeneratesFact) { - return generateFromZero(CallSite); + return generateFromZero(RetVal); } return identityFlow(); } - const auto *Call = llvm::cast(CallSite); - - // Assume, the sret param is at index 0 - const auto *SRet = - Call->hasStructRetAttr() ? Call->getArgOperand(0) : nullptr; - auto Mapper = mapFactsAlongsideCallSite( Call, - [SRet](d_t Arg) { - if (SRet == Arg) { + [RetVal](d_t Arg) { + if (RetVal == Arg) { // perform strong update here return false; } @@ -317,7 +319,7 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( if (GeneratesFact) { return unionFlows(std::move(Mapper), - generateFlowAndKillAllOthers(CallSite, getZeroValue())); + generateFlowAndKillAllOthers(RetVal, getZeroValue())); } return Mapper; } diff --git a/test/llvm_test_code/inst_interaction/rvo_04.cpp b/test/llvm_test_code/inst_interaction/rvo_04.cpp index 9aca5f2608..cebb85a9e1 100644 --- a/test/llvm_test_code/inst_interaction/rvo_04.cpp +++ b/test/llvm_test_code/inst_interaction/rvo_04.cpp @@ -7,6 +7,7 @@ struct Foo { int X = 0; Foo() noexcept = default; + // NOLINTNEXTLINE Foo(const Foo &Other) noexcept : X(Other.X) {} }; From d8b5a3c4344f5239ae56dd751eed0cc5ee3642c7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 8 Oct 2024 14:15:52 +0200 Subject: [PATCH 22/33] Fix unionWith in IDEFeatureTaintEdgeFact --- .../IfdsIde/Problems/IDEFeatureTaintAnalysis.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index 3e174cacaf..9ecc513e5f 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -69,9 +69,26 @@ struct IDEFeatureTaintEdgeFact { Taints.setBitsInMask((const uint32_t *)&Facts, sizeof(Facts)); } void unionWith(const IDEFeatureTaintEdgeFact &Facts) { + if (Facts.isTop()) { + return; + } Taints |= Facts.Taints; } + /// Checks whether this set contains no facts. + /// Note: Top also counts as empty + [[nodiscard]] inline bool empty() const noexcept { + return isTop() || Taints.none(); + } + /// Returns the number of facts in this set. + /// Note: Top counts as empty + [[nodiscard]] inline size_t size() const noexcept { + if (isTop()) { + return 0; + } + return Taints.count(); + } + [[nodiscard]] inline bool isBottom() const noexcept { return Taints.empty(); } [[nodiscard]] inline bool isTop() const noexcept { return Taints.isInvalid(); From 5a7a12635ac5454b7863d3dd86a988d56ba1a0db Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 Nov 2024 20:25:04 +0100 Subject: [PATCH 23/33] some cleanup --- .clang-tidy | 2 +- external/json-schema-validator | 2 +- .../Problems/IDEFeatureTaintAnalysis.h | 32 +- include/phasar/Utils/BitVectorSet.h | 2 +- .../Problems/IDEFeatureTaintAnalysis.cpp | 328 +++--------------- 5 files changed, 71 insertions(+), 295 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 54f04105c7..4540f43a83 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -57,7 +57,7 @@ CheckOptions: - key: readability-identifier-naming.ParameterIgnoredRegexp value: (d|d1|d2|d3|d4|d5|eP|f|n) - key: readability-identifier-naming.FunctionIgnoredRegexp - value: (try_emplace|from_json|to_json|equal_to|to_string|DToString|NToString|FToString|LToString) + value: (try_emplace|from_json|to_json|equal_to|to_string|DToString|NToString|FToString|LToString|hash_value) - key: cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor value: 1 - key: cppcoreguidelines-special-member-functions.AllowMissingMoveFunctions diff --git a/external/json-schema-validator b/external/json-schema-validator index 349cba9f7e..491ac44026 160000 --- a/external/json-schema-validator +++ b/external/json-schema-validator @@ -1 +1 @@ -Subproject commit 349cba9f7e3cb423bbc1811bdd9f6770f520b468 +Subproject commit 491ac44026e08f31790f5cacffa62e168bb35e32 diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index 9ecc513e5f..a354a1aa28 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -41,7 +41,7 @@ class LLVMProjectIRDB; struct IDEFeatureTaintEdgeFact { llvm::SmallBitVector Taints; - static llvm::SmallBitVector fromBits(uintptr_t Bits) { + [[nodiscard]] static llvm::SmallBitVector fromBits(uintptr_t Bits) { #if __has_builtin(__builtin_constant_p) if (__builtin_constant_p(Bits) && Bits == 0) { return {}; @@ -94,7 +94,7 @@ struct IDEFeatureTaintEdgeFact { return Taints.isInvalid(); } - friend llvm::hash_code + [[nodiscard]] friend llvm::hash_code hash_value(const IDEFeatureTaintEdgeFact &BV) noexcept { if (BV.Taints.empty()) { return {}; @@ -165,7 +165,7 @@ struct IDEFeatureTaintEdgeFact { } }; -std::string LToString(const IDEFeatureTaintEdgeFact &EdgeFact); +[[nodiscard]] std::string LToString(const IDEFeatureTaintEdgeFact &EdgeFact); template <> struct JoinLatticeTraits { inline static IDEFeatureTaintEdgeFact top() { @@ -212,10 +212,7 @@ class FeatureTaintGenerator { BitVectorSet, llvm::SmallBitVector> BV( llvm::adl_begin(TaintSet), llvm::adl_end(TaintSet)); - auto Ret = IDEFeatureTaintEdgeFact{std::move(BV).getBits()}; - - // llvm::errs() << "generateTaints: " << LToString(Ret) << '\n'; - return Ret; + return IDEFeatureTaintEdgeFact{std::move(BV).getBits()}; }; } @@ -236,17 +233,6 @@ class FeatureTaintGenerator { GenerateTaints(std::move(GenerateTaints)), Printer(std::move(Printer)) { } - template >>> - FeatureTaintGenerator(EdgeFactGenerator &&EFGen) - : IsFeatureSource([EFGen{EFGen}](InstOrGlobal IG) { - return !llvm::empty(std::invoke(EFGen, IG)); - }), - GenerateTaints( - createGenerateTaints(std::forward(EFGen))), - Printer(createEdgeFactPrinter()) {} - template FeatureTaintGenerator(SourceDetector &&SrcDetector, EdgeFactGenerator &&EFGen) : IsFeatureSource(std::forward(SrcDetector)), @@ -254,6 +240,16 @@ class FeatureTaintGenerator { createGenerateTaints(std::forward(EFGen))), Printer(createEdgeFactPrinter()) {} + template >>> + FeatureTaintGenerator(EdgeFactGenerator &&EFGen) + : FeatureTaintGenerator( + [EFGen](InstOrGlobal IG) { + return !llvm::empty(std::invoke(EFGen, IG)); + }, + std::forward(EFGen)) {} + [[nodiscard]] bool isSource(InstOrGlobal IG) const { return IsFeatureSource(IG); } diff --git a/include/phasar/Utils/BitVectorSet.h b/include/phasar/Utils/BitVectorSet.h index ce62b3bf71..fc8c4bdc02 100644 --- a/include/phasar/Utils/BitVectorSet.h +++ b/include/phasar/Utils/BitVectorSet.h @@ -167,7 +167,7 @@ class BitVectorSet { // T getVal() {return pos_ptr->second;} - [[nodiscard]] BitVectorTy getBits() const { return Bits; } + [[nodiscard]] const BitVectorTy &getBits() const { return Bits; } private: D PosPtr; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index bfaf461ca4..652c0c885a 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -76,7 +76,7 @@ static bool isMustAlias(const llvm::Value *Val1, return true; } - // TODO: handle more cases + // XXX: handle more cases return false; } @@ -111,16 +111,6 @@ static auto getStoreFF(bool GeneratesFact, LLVMAliasInfoRef PT, auto ValuePTS = PT.getReachableAllocationSites(Value, true, Inst); - llvm::errs() << "At " << llvmIRToString(Inst) << ":\n"; - llvm::errs() << "> PointerRet:\n"; - for (const auto *Ptr : PointerRet) { - llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; - } - // llvm::errs() << "> ValuePTS:\n"; - // for (const auto *Ptr : *ValuePTS) { - // llvm::errs() << "> " << llvmIRToString(Ptr) << '\n'; - // } - return FlowFunctionTemplates::lambdaFlow( [Dest, Value, PointerRet = std::move(PointerRet), ValuePTS = std::move(ValuePTS), @@ -137,7 +127,6 @@ static auto getStoreFF(bool GeneratesFact, LLVMAliasInfoRef PT, if (Value == Src || (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) || ValuePTS->count(Src)) { - // llvm::errs() << "> Store\n"; return PointerRet; } @@ -145,14 +134,6 @@ static auto getStoreFF(bool GeneratesFact, LLVMAliasInfoRef PT, }(); Facts.insert(Src); - - // llvm::errs() << "Gen { "; - - // llvm::interleaveComma(Facts, llvm::errs(), [](const auto *Ptr) { - // llvm::errs() << llvmIRToShortString(Ptr); - // }); - // llvm::errs() << " } at " << llvmIRToString(Inst) << '\n'; - return Facts; }); } @@ -161,9 +142,6 @@ auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) -> FlowFunctionPtrType { bool GeneratesFact = TaintGen.isSource(Curr); - // llvm::errs() << "[getNormalFlowFunction]: " << llvmIRToString(Curr) << - // '\n'; - if (const auto *Alloca = llvm::dyn_cast(Curr)) { if (GeneratesFact) { return generateFromZero(Alloca); @@ -241,16 +219,6 @@ auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) return true; }); - // Generate the artificially introduced RVO parameters from zero value. - // const auto *SRetFormal = - // CS->hasStructRetAttr() ? DestFun->getArg(0) : nullptr; - - // if (SRetFormal && TaintGen.isSource(CallSite)) { - // return unionFlows( - // std::move(MapFactsToCalleeFF), - // generateFlowAndKillAllOthers(SRetFormal, this->getZeroValue())); - // } - return MapFactsToCalleeFF; } @@ -312,8 +280,6 @@ auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( } return true; - // return !Arg->getType()->isPointerTy(); - // return llvm::isa(Arg); }, /*PropagateGlobals*/ false); @@ -349,13 +315,14 @@ struct IDEFeatureTaintAnalysis::AddFactsEF { return Source; } - static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) { + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { llvm::report_fatal_error("Implemented in 'extend'"); } - static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { llvm::report_fatal_error("Implemented in 'combine'"); } @@ -363,7 +330,6 @@ struct IDEFeatureTaintAnalysis::AddFactsEF { return L.Facts == R.Facts; } - // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL friend llvm::hash_code hash_value(const AddFactsEF &EF) { return hash_value(EF.Facts); } @@ -380,13 +346,14 @@ struct IDEFeatureTaintAnalysis::GenerateEF { return Facts; } - static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) { + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { llvm::report_fatal_error("Implemented in 'extend'"); } - static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { llvm::report_fatal_error("Implemented in 'combine'"); } @@ -394,7 +361,6 @@ struct IDEFeatureTaintAnalysis::GenerateEF { return L.Facts == R.Facts; } - // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL constexpr friend llvm::hash_code hash_value(const GenerateEF &EF) { return hash_value(EF.Facts); } @@ -411,21 +377,23 @@ struct AddSmallFactsEF { return Source; } - static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) { + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { llvm::report_fatal_error("Implemented in 'extend'"); } - static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { llvm::report_fatal_error("Implemented in 'combine'"); } + // NOLINTNEXTLINE -- unused function -- must satisfy the EF interface friend bool operator==(const AddSmallFactsEF &L, const AddSmallFactsEF &R) { return L.Facts == R.Facts; } - // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL + // NOLINTNEXTLINE -- unused function -- must satisfy the EF interface friend llvm::hash_code hash_value(const AddSmallFactsEF &EF) { return llvm::hash_value(EF.Facts); } @@ -442,179 +410,34 @@ struct GenerateSmallEF { return Facts; } - static EdgeFunction compose(EdgeFunctionRef This, - const EdgeFunction &SecondFunction) { + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { llvm::report_fatal_error("Implemented in 'extend'"); } - static EdgeFunction join(EdgeFunctionRef This, - const EdgeFunction &OtherFunction) { + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { llvm::report_fatal_error("Implemented in 'combine'"); } - // NOLINTNEXTLINE(readability-identifier-naming) -- needed for ADL + // NOLINTNEXTLINE -- unused function -- must satisfy the EF interface friend llvm::hash_code hash_value(const GenerateSmallEF &EF) { return llvm::hash_value(EF.Facts); } + // NOLINTNEXTLINE -- unused function -- must satisfy the EF interface friend bool operator==(GenerateSmallEF L, GenerateSmallEF R) { return L.Facts == R.Facts; } + // NOLINTNEXTLINE -- unused function -- used in EdgeFunction friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, GenerateSmallEF EF) { return OS << "GenerateSmallEF" << LToString(EF.computeTarget(0)); } }; -// auto GenerateSmallEF::compose(EdgeFunctionRef This, -// const EdgeFunction &SecondFunction) -// -> EdgeFunction { -// if (auto Default = defaultComposeOrNull(This, SecondFunction)) { -// return Default; -// } - -// auto Val = SecondFunction.computeTarget(This->computeTarget(0)); - -// if (Val.Taints.isSmall()) { -// uintptr_t Buf{}; -// std::ignore = Val.Taints.getData(Buf); -// return GenerateSmallEF{Buf}; -// } - -// // TODO: Caching - -// return GenerateEF{std::move(Val)}; -// } - -// auto AddSmallFactsEF::compose(EdgeFunctionRef This, -// const EdgeFunction &SecondFunction) -// -> EdgeFunction { -// if (auto Default = defaultComposeOrNull(This, SecondFunction)) { -// return Default; -// } - -// auto Val = SecondFunction.computeTarget(This->computeTarget(0)); - -// if (Val.Taints.isSmall()) { -// uintptr_t Buf{}; -// std::ignore = Val.Taints.getData(Buf); -// return AddSmallFactsEF{Buf}; -// } - -// // TODO: Caching - -// return AddFactsEF{std::move(Val)}; -// } - -// auto GenerateEF::compose(EdgeFunctionRef This, -// const EdgeFunction &SecondFunction) -// -> EdgeFunction { -// if (auto Default = defaultComposeOrNull(This, SecondFunction)) { -// return Default; -// } - -// auto Val = SecondFunction.computeTarget(This->computeTarget(0)); - -// // TODO: Caching - -// return GenerateEF{std::move(Val)}; -// } - -// auto AddFactsEF::compose(EdgeFunctionRef This, -// const EdgeFunction &SecondFunction) -// -> EdgeFunction { -// if (auto Default = defaultComposeOrNull(This, SecondFunction)) { -// return Default; -// } - -// auto Val = SecondFunction.computeTarget(This->computeTarget(0)); - -// // TODO: Caching - -// return AddFactsEF{std::move(Val)}; -// } - -// template -// EdgeFunction joinWithGen(EdgeFunctionRef This, -// const EdgeFunction &OtherFunction) { -// if (auto Default = defaultJoinOrNull(This, OtherFunction)) { -// return Default; -// } - -// auto OtherFacts = OtherFunction.computeTarget(0); -// OtherFacts.unionWith(This->Facts); - -// if (OtherFacts.Taints.isSmall()) { -// uintptr_t Buf{}; -// std::ignore = OtherFacts.Taints.getData(Buf); - -// if (OtherFunction.isConstant()) { -// return GenerateSmallEF{Buf}; -// } - -// return AddSmallFactsEF{Buf}; -// } - -// // TODO: Caching - -// if (OtherFunction.isConstant()) { -// return GenerateEF{std::move(OtherFacts)}; -// } - -// return AddFactsEF{std::move(OtherFacts)}; -// } - -// template -// EdgeFunction joinWithAdd(EdgeFunctionRef This, -// const EdgeFunction &OtherFunction) { -// /// XXX: Here, we underapproximate joins with EdgeIdentity -// if (llvm::isa>(OtherFunction)) { -// return This; -// } - -// if (auto Default = defaultJoinOrNull(This, OtherFunction)) { -// return Default; -// } - -// auto OtherFacts = OtherFunction.computeTarget(0); -// OtherFacts.unionWith(This->Facts); - -// if (OtherFacts.Taints.isSmall()) { -// uintptr_t Buf{}; -// std::ignore = OtherFacts.Taints.getData(Buf); - -// return AddSmallFactsEF{Buf}; -// } - -// // TODO: Caching - -// return AddFactsEF{std::move(OtherFacts)}; -// } - -// auto GenerateSmallEF::join(EdgeFunctionRef This, -// const EdgeFunction &OtherFunction) -// -> EdgeFunction { -// return joinWithGen(This, OtherFunction); -// } -// auto GenerateEF::join(EdgeFunctionRef This, -// const EdgeFunction &OtherFunction) -// -> EdgeFunction { -// return joinWithGen(This, OtherFunction); -// } - -// auto AddSmallFactsEF::join(EdgeFunctionRef This, -// const EdgeFunction &OtherFunction) -// -> EdgeFunction { -// return joinWithAdd(This, OtherFunction); -// } - -// auto AddFactsEF::join(EdgeFunctionRef This, -// const EdgeFunction &OtherFunction) -// -> EdgeFunction { -// return joinWithAdd(This, OtherFunction); -// } - /// template @@ -708,59 +531,44 @@ EdgeFunction iiaDefaultJoinOrNull(const EdgeFunction &This, EdgeFunction IDEFeatureTaintAnalysis::extend(const EdgeFunction &FirstEF, const EdgeFunction &SecondEF) { - auto Ret = [&] { - if (auto Default = defaultComposeOrNull(FirstEF, SecondEF)) { - // llvm::errs() << "defaultComposeOrNull>>\n"; - return Default; - } - auto Val = SecondEF.computeTarget(FirstEF.computeTarget(0)); - - if (FirstEF.isConstant()) { - return genEF(std::move(Val), GenEFCache); - } + if (auto Default = defaultComposeOrNull(FirstEF, SecondEF)) { + return Default; + } - return addEF(std::move(Val), AddEFCache); - }(); + auto Val = SecondEF.computeTarget(FirstEF.computeTarget(0)); - // llvm::errs() << "Extend " << FirstEF << " with " << SecondEF << " --> " << - // Ret - // << '\n'; + if (FirstEF.isConstant()) { + return genEF(std::move(Val), GenEFCache); + } - return Ret; + return addEF(std::move(Val), AddEFCache); } + EdgeFunction IDEFeatureTaintAnalysis::combine(const EdgeFunction &FirstEF, const EdgeFunction &OtherEF) { - auto Ret = [&] { - /// XXX: Here, we underapproximate joins with EdgeIdentity - if (llvm::isa>(FirstEF)) { - return OtherEF; - } - if (llvm::isa>(OtherEF) && - !llvm::isa>(FirstEF)) { - return FirstEF; - } - if (auto Default = iiaDefaultJoinOrNull(FirstEF, OtherEF)) { - return Default; - } + /// XXX: Here, we underapproximate joins with EdgeIdentity + if (llvm::isa>(FirstEF)) { + return OtherEF; + } + if (llvm::isa>(OtherEF) && + !llvm::isa>(FirstEF)) { + return FirstEF; + } - // auto ThisFacts = FirstEF.computeTarget(0); - // ThisFacts.unionWith(OtherEF.computeTarget(0)); - auto ThisFacts = unionTaints(FirstEF, OtherEF); + if (auto Default = iiaDefaultJoinOrNull(FirstEF, OtherEF)) { + return Default; + } - if (FirstEF.isConstant() && OtherEF.isConstant()) { - return genEF(std::move(ThisFacts), GenEFCache); - } + auto ThisFacts = unionTaints(FirstEF, OtherEF); - return addEF(std::move(ThisFacts), AddEFCache); - }(); + if (FirstEF.isConstant() && OtherEF.isConstant()) { + return genEF(std::move(ThisFacts), GenEFCache); + } - // llvm::errs() << "Combine " << FirstEF << " and " << OtherEF << " --> " << - // Ret - // << '\n'; - return Ret; + return addEF(std::move(ThisFacts), AddEFCache); } auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, @@ -770,31 +578,14 @@ auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, if (isZeroValue(SuccNode) || CurrNode == SuccNode) { // We don't want to propagate any facts on zero - - // llvm::errs() << "Identity Edge\n"; return EdgeIdentity{}; } if (isZeroValue(CurrNode)) { - // llvm::errs() << "Generate from Zero\n"; - // Generate user edge-facts from zero return genEF(TaintGen.getGeneratedTaintsAt(Curr), GenEFCache); } - // Overrides at store instructions - // if (const auto *Store = llvm::dyn_cast(Curr)) { - // if (CurrNode == Store->getValueOperand()) { - // // Store tainted value - - // // propagate facts unchanged. User edge-facts are generated from zero. - - // // llvm::errs() << "Store Identity\n"; - // return EdgeIdentity{}; - // } - // } - - // llvm::errs() << "Fallback Identity\n"; // Otherwise stick to identity. return EdgeIdentity{}; } @@ -812,7 +603,7 @@ auto IDEFeatureTaintAnalysis::getCallEdgeFunction(n_t CallSite, d_t SrcNode, } auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( - n_t CallSite, f_t /*CalleeFunction*/, n_t ExitStmt, d_t ExitNode, + n_t /*CallSite*/, f_t /*CalleeFunction*/, n_t ExitStmt, d_t ExitNode, n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { // Generate user edge-facts from zero @@ -827,9 +618,6 @@ auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( llvm::ArrayRef /*Callees*/) -> EdgeFunction { if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { // Generate user edge-facts from zero - - // llvm::errs() << "At CTR " << llvmIRToString(CallSite) - // << ": Gen from zero!\n"; return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); } @@ -856,11 +644,10 @@ auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( } auto IDEFeatureTaintAnalysis::getSummaryEdgeFunction(n_t Curr, d_t CurrNode, - n_t Succ, d_t SuccNode) + n_t /*Succ*/, d_t SuccNode) -> EdgeFunction { if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { // Generate user edge-facts from zero - return genEF(TaintGen.getGeneratedTaintsAt(Curr), GenEFCache); } @@ -881,12 +668,7 @@ auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { // parameters will otherwise cause trouble by overriding alloca // instructions without being valid data-flow facts themselves. - /// TODO: Do we want that? --NO - // for (const auto &Arg : SP->getFunction()->args()) { - // Seeds.addSeed(SP, &Arg, BitVectorSet()); - // } // Generate all global variables using generalized initial seeds - for (const auto &G : this->IRDB->getModule()->globals()) { if (const auto *GV = llvm::dyn_cast(&G)) { l_t InitialValues = TaintGen.getGeneratedTaintsAt(GV); @@ -897,8 +679,6 @@ auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { } }); - // Seeds.dump(llvm::errs()); - return Seeds; } @@ -918,7 +698,7 @@ void IDEFeatureTaintAnalysis::emitTextReport( for (const auto &Inst : llvm::instructions(F)) { auto Results = SR.resultsAt(&Inst, true); - // stripBottomResults(Results); + if (!Results.empty()) { OS << "At IR statement: " << NToString(Inst) << '\n'; for (const auto &Result : Results) { From 926ec4ff42bfe10ed1a6fc2e2ba86cc561d59f57 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Wed, 13 Nov 2024 20:35:41 +0100 Subject: [PATCH 24/33] fix bugs introduced by merge --- .../DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h | 2 +- include/phasar/Utils/TypeTraits.h | 2 +- .../DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp | 2 +- tools/phasar-cli/Controller/AnalysisControllerInternal.h | 1 + tools/phasar-cli/Controller/AnalysisControllerXIDEFIIA.cpp | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index a354a1aa28..c8edb4f3b2 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -353,7 +353,7 @@ class IDEFeatureTaintAnalysis bool isZeroValue(d_t FlowFact) const noexcept override; - void emitTextReport(const SolverResults &SR, + void emitTextReport(GenericSolverResults SR, llvm::raw_ostream &OS = llvm::outs()) override; EdgeFunction extend(const EdgeFunction &FirstEF, diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index 0f687504f6..58d9045c81 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -13,7 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" -#include "nlohmann/json_fwd.hpp" +#include "nlohmann/json.hpp" #include #include diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp index 652c0c885a..1941ba9ef6 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -687,7 +687,7 @@ bool IDEFeatureTaintAnalysis::isZeroValue(d_t FlowFact) const noexcept { } void IDEFeatureTaintAnalysis::emitTextReport( - const SolverResults &SR, llvm::raw_ostream &OS) { + GenericSolverResults SR, llvm::raw_ostream &OS) { OS << "\n====================== IDE-Inst-Interaction-Analysis Report " "======================\n"; diff --git a/tools/phasar-cli/Controller/AnalysisControllerInternal.h b/tools/phasar-cli/Controller/AnalysisControllerInternal.h index 1246518fb6..cf0075277c 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerInternal.h +++ b/tools/phasar-cli/Controller/AnalysisControllerInternal.h @@ -42,6 +42,7 @@ LLVM_LIBRARY_VISIBILITY void executeIDECSTDIOTS(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIDELinearConst(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIDESolverTest(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIDEIIA(AnalysisController &Data); +LLVM_LIBRARY_VISIBILITY void executeIDEFIIA(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIntraMonoFullConstant(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void diff --git a/tools/phasar-cli/Controller/AnalysisControllerXIDEFIIA.cpp b/tools/phasar-cli/Controller/AnalysisControllerXIDEFIIA.cpp index 728083d7c0..6bc04e31aa 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerXIDEFIIA.cpp +++ b/tools/phasar-cli/Controller/AnalysisControllerXIDEFIIA.cpp @@ -17,7 +17,7 @@ using namespace psr; -void controller::executeIDEFIIA(AnalysisController::ControllerData &Data) { +void controller::executeIDEFIIA(AnalysisController &Data) { // use Phasar's instruction ids as testing labels auto Generator = [](std::variant From bfc76ed1909e80971566b6fabf60102c90df838a Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 29 Nov 2024 08:59:01 +0100 Subject: [PATCH 25/33] Start adding global_5 test --- external/json-schema-validator | 2 +- .../llvm_test_code/inst_interaction/CMakeLists.txt | 1 + test/llvm_test_code/inst_interaction/global_05.cpp | 12 ++++++++++++ .../Problems/IDEFeatureTaintAnalysisTest.cpp | 14 ++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test/llvm_test_code/inst_interaction/global_05.cpp diff --git a/external/json-schema-validator b/external/json-schema-validator index 491ac44026..349cba9f7e 160000 --- a/external/json-schema-validator +++ b/external/json-schema-validator @@ -1 +1 @@ -Subproject commit 491ac44026e08f31790f5cacffa62e168bb35e32 +Subproject commit 349cba9f7e3cb423bbc1811bdd9f6770f520b468 diff --git a/test/llvm_test_code/inst_interaction/CMakeLists.txt b/test/llvm_test_code/inst_interaction/CMakeLists.txt index a111a7103d..6fdfa88e8b 100644 --- a/test/llvm_test_code/inst_interaction/CMakeLists.txt +++ b/test/llvm_test_code/inst_interaction/CMakeLists.txt @@ -22,6 +22,7 @@ set(Sources call_07.cpp global_01.cpp global_02.cpp + global_05.cpp heap_01.cpp KillTest_01.cpp KillTest_02.cpp diff --git a/test/llvm_test_code/inst_interaction/global_05.cpp b/test/llvm_test_code/inst_interaction/global_05.cpp new file mode 100644 index 0000000000..e71fb9069e --- /dev/null +++ b/test/llvm_test_code/inst_interaction/global_05.cpp @@ -0,0 +1,12 @@ +#include + +int g = 0; + +void init() { g = 1; } + +int foo() { return g; } + +int main() { + init(); + std::cout << foo(); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index 9a3330f796..2736eb6ca3 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -424,6 +424,20 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_04) { doAnalysisAndCompareResults("global_04_cpp.ll", {"main", "_Z7doStuffi"}, GroundTruth, false); } +TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_05) { + std::set GroundTruth; + // GroundTruth.emplace("main", 1, "GlobalFeature", + // std::set{"0"}); GroundTruth.emplace("main", 2, + // "GlobalFeature", std::set{"0"}); GroundTruth.emplace("main", + // 17, "GlobalFeature", std::set{"0"}); + // GroundTruth.emplace("_Z7doStuffi", 1, "GlobalFeature", + // std::set{"0"}); + // GroundTruth.emplace("_Z7doStuffi", 2, "GlobalFeature", + // std::set{"0"}); + doAnalysisAndCompareResults("global_05_cpp.ll", {"main"}, GroundTruth, true); + ASSERT_FALSE(true) << "TODO: Add GroundTruth! An init() soll nichts stehen, " + "außer init() selbst. g soll strongly updated sein"; +} TEST_F(IDEInstInteractionAnalysisTest, KillTest_01) { std::set GroundTruth; From 0207c1fdae223469e288ba6993f7c1f67c9d9f53 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 17 Dec 2024 20:54:12 +0100 Subject: [PATCH 26/33] Add print on error to debug the ci --- .../Problems/IDEFeatureTaintAnalysisTest.cpp | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index 2736eb6ca3..b3675ef952 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -104,13 +104,13 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { IDESolver IIASolver(IIAProblem, &HA->getICFG()); IIASolver.solve(); - if (PrintDump) { - // IRDB->emitPreprocessedIR(llvm::outs()); - IIASolver.dumpResults(); - llvm::outs() - << "\n======================================================\n"; - printDump(HA->getProjectIRDB(), IIASolver.getSolverResults()); - } + // if (PrintDump) { + // // IRDB->emitPreprocessedIR(llvm::outs()); + // IIASolver.dumpResults(); + // llvm::errs() + // << "\n======================================================\n"; + // printDump(HA->getProjectIRDB(), IIASolver.getSolverResults()); + // } // do the comparison for (const auto &[FunName, SrcLine, VarName, LatticeVal] : GroundTruth) { const auto *Fun = IRDB->getFunctionDefinition(FunName); @@ -137,6 +137,13 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { EXPECT_TRUE(FactFound) << "Variable '" << VarName << "' missing at '" << llvmIRToString(IRLine) << "'."; } + + if (HasFailure()) { + IIASolver.dumpResults(); + llvm::errs() + << "\n======================================================\n"; + printDump(HA->getProjectIRDB(), IIASolver.getSolverResults()); + } } void TearDown() override {} @@ -189,11 +196,11 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { for (const auto *Inst : IRDB.getAllInstructions()) { if (CurrFun != Inst->getFunction()) { CurrFun = Inst->getFunction(); - llvm::outs() << "\n=================== '" << CurrFun->getName() + llvm::errs() << "\n=================== '" << CurrFun->getName() << "' ===================\n"; } - llvm::outs() << " N: " << llvmIRToString(Inst) << '\n'; - llvm::outs() << " D: " << printSet(taintsForInst(Inst, SR)) << "\n\n"; + llvm::errs() << " N: " << llvmIRToString(Inst) << '\n'; + llvm::errs() << " D: " << printSet(taintsForInst(Inst, SR)) << "\n\n"; } } From 402eec17da490a8d56020976e8d3deff58bf2d79 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 5 Jan 2025 10:48:24 +0100 Subject: [PATCH 27/33] Start fixing CI for FIIA --- .../IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index b3675ef952..501d03cb0f 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -139,7 +139,7 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { } if (HasFailure()) { - IIASolver.dumpResults(); + IIASolver.dumpResults(llvm::errs()); llvm::errs() << "\n======================================================\n"; printDump(HA->getProjectIRDB(), IIASolver.getSolverResults()); @@ -322,7 +322,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_01) { std::set{"12", "9", "10", "11"}); GroundTruth.emplace( "main", 14, "k", - std::set{"15", "1", "2", "13", "12", "9", "10", "11"}); + std::set{"15", "1", "2", "13", "14", "12", "9", "10", "11"}); doAnalysisAndCompareResults("call_01_cpp.ll", {"main"}, GroundTruth, false); } @@ -333,7 +333,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_02) { GroundTruth.emplace("main", 13, "j", std::set{"14"}); GroundTruth.emplace("main", 13, "k", std::set{"4", "5", "15", "6", "3", "14", "2", - "13", "16", "18"}); + "13", "16", "17", "18"}); doAnalysisAndCompareResults("call_02_cpp.ll", {"main"}, GroundTruth, false); } From 46338baf534117db0bc8495ad83db5e6fb3b6631 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 5 Jan 2025 12:08:40 +0100 Subject: [PATCH 28/33] Add missing ground-truth for FIIA RVO and Global --- .../inst_interaction/rvo_03.cpp | 4 +- .../Problems/IDEFeatureTaintAnalysisTest.cpp | 114 ++++++++++-------- 2 files changed, 63 insertions(+), 55 deletions(-) diff --git a/test/llvm_test_code/inst_interaction/rvo_03.cpp b/test/llvm_test_code/inst_interaction/rvo_03.cpp index d9390f0755..f959b899d1 100644 --- a/test/llvm_test_code/inst_interaction/rvo_03.cpp +++ b/test/llvm_test_code/inst_interaction/rvo_03.cpp @@ -7,7 +7,7 @@ class String { public: String() noexcept = default; - String(const char *Data) : Length(strlen(Data)) { + String(const char *Data) noexcept : Length(strlen(Data)) { auto *Dat = new char[Length]; this->Data = Dat; memcpy(Dat, Data, Length); @@ -43,7 +43,7 @@ class String { int g = 0; void functionWithoutInput() { g = 42; } -String createString() { return "My String"; } +String createString() noexcept { return "My String"; } int main() { String Str; diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index 501d03cb0f..0d1a71ba2f 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -138,7 +138,7 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { << llvmIRToString(IRLine) << "'."; } - if (HasFailure()) { + if (PrintDump || HasFailure()) { IIASolver.dumpResults(llvm::errs()); llvm::errs() << "\n======================================================\n"; @@ -342,8 +342,9 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_03) { GroundTruth.emplace("main", 10, "retval", std::set{"20"}); GroundTruth.emplace("main", 10, "i", std::set{"21"}); GroundTruth.emplace("main", 10, "j", - std::set{"22", "15", "6", "21", "2", "13", - "8", "9", "12", "10", "24"}); + std::set{"22", "23", "15", "6", "21", "2", + "13", "8", "9", "11", "12", "10", + "24"}); doAnalysisAndCompareResults("call_03_cpp.ll", {"main"}, GroundTruth, false); } @@ -353,12 +354,14 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_04) { GroundTruth.emplace("main", 20, "i", std::set{"34"}); GroundTruth.emplace("main", 20, "j", std::set{"15", "6", "2", "13", "8", "9", - "12", "10", "35", "34", "37"}); + "11", "12", "10", "35", "36", "34", + "37"}); GroundTruth.emplace("main", 20, "k", - std::set{ - "41", "19", "15", "6", "44", "2", "13", "8", "45", - "18", "9", "12", "10", "46", "24", "25", "35", "27", - "23", "26", "38", "34", "37", "42", "40"}); + std::set{"41", "19", "15", "6", "44", "2", + "13", "8", "45", "18", "9", "11", + "12", "10", "46", "24", "25", "35", + "36", "27", "23", "26", "38", "34", + "37", "42", "43", "39", "40"}); doAnalysisAndCompareResults("call_04_cpp.ll", {"main"}, GroundTruth, false); } @@ -374,14 +377,18 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_06) { // NOTE: Here we are suffering from IntraProceduralAliasesOnly std::set GroundTruth; GroundTruth.emplace("main", 24, "retval", std::set{"11"}); - GroundTruth.emplace("main", 24, "i", - std::set{"3", "1", "2", "16", "18", "12"}); - GroundTruth.emplace("main", 24, "j", - std::set{"19", "21", "3", "1", "2", "13"}); - GroundTruth.emplace("main", 24, "k", - std::set{"22", "3", "14", "1", "2", "24"}); - GroundTruth.emplace("main", 24, "l", - std::set{"15", "3", "1", "2", "25", "27"}); + GroundTruth.emplace( + "main", 24, "i", + std::set{"3", "1", "2", "16", "17", "18", "12"}); + GroundTruth.emplace( + "main", 24, "j", + std::set{"19", "20", "21", "3", "1", "2", "13"}); + GroundTruth.emplace( + "main", 24, "k", + std::set{"22", "23", "3", "14", "1", "2", "24"}); + GroundTruth.emplace( + "main", 24, "l", + std::set{"15", "3", "1", "2", "25", "26", "27"}); doAnalysisAndCompareResults("call_06_cpp.ll", {"main"}, GroundTruth, false); } @@ -433,17 +440,16 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_04) { } TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_05) { std::set GroundTruth; - // GroundTruth.emplace("main", 1, "GlobalFeature", - // std::set{"0"}); GroundTruth.emplace("main", 2, - // "GlobalFeature", std::set{"0"}); GroundTruth.emplace("main", - // 17, "GlobalFeature", std::set{"0"}); - // GroundTruth.emplace("_Z7doStuffi", 1, "GlobalFeature", - // std::set{"0"}); - // GroundTruth.emplace("_Z7doStuffi", 2, "GlobalFeature", - // std::set{"0"}); + + // NOTE: Facts at init() should be empty, except for its own ID; + // g should be strongly updated + + GroundTruth.emplace("main", 1, "g", std::set{"0"}); + GroundTruth.emplace("main", 2, "g", std::set{"2"}); + GroundTruth.emplace("main", 4, "call", std::set{"2", "4", "7"}); + GroundTruth.emplace("main", 4, "g", std::set{"2"}); + doAnalysisAndCompareResults("global_05_cpp.ll", {"main"}, GroundTruth, true); - ASSERT_FALSE(true) << "TODO: Add GroundTruth! An init() soll nichts stehen, " - "außer init() selbst. g soll strongly updated sein"; } TEST_F(IDEInstInteractionAnalysisTest, KillTest_01) { @@ -470,9 +476,10 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleReturnTest_01) { std::set GroundTruth; GroundTruth.emplace("main", 6, "retval", std::set{"3"}); GroundTruth.emplace("main", 6, "localVar", std::set{"4"}); - GroundTruth.emplace("main", 6, "call", std::set{"0"}); - GroundTruth.emplace("main", 8, "localVar", std::set{"0", "6"}); - GroundTruth.emplace("main", 8, "call", std::set{"0"}); + GroundTruth.emplace("main", 6, "call", std::set{"0", "5"}); + GroundTruth.emplace("main", 8, "localVar", + std::set{"0", "5", "6"}); + GroundTruth.emplace("main", 8, "call", std::set{"0", "5"}); doAnalysisAndCompareResults("return_01_cpp.ll", {"main"}, GroundTruth, false); } @@ -499,43 +506,44 @@ PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_01) { }) PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_02) { - // GTEST_SKIP() << "This test heavily depends on the used stdlib version. - // TODO: " - // "add a better one"; + GTEST_SKIP() << "This test heavily depends on the used stdlib version. TODO: " + "add a better one"; std::set GroundTruth; - // GroundTruth.emplace("main", 16, "retval", std::set{"75", - // "76"}); GroundTruth.emplace("main", 16, "str", - // std::set{"70", "65", "72", "74", "77"}); - // GroundTruth.emplace("main", 16, "ref.tmp", - // std::set{"66", "9", "72", "73", "71"}); + GroundTruth.emplace("main", 18, "retval", std::set{"75", "76"}); + GroundTruth.emplace("main", 18, "str", + std::set{"70", "65", "72", "74", "77"}); + GroundTruth.emplace("main", 18, "ref.tmp", + std::set{"66", "9", "72", "73", "71"}); doAnalysisAndCompareResults("rvo_02_cpp.ll", {"main"}, GroundTruth, true); - - ASSERT_FALSE(true) << "Add GroundTruth!"; }) TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_03) { std::set GroundTruth; - // GroundTruth.emplace("main", 16, "retval", std::set{"75", - // "76"}); GroundTruth.emplace("main", 16, "str", - // std::set{"70", "65", "72", "74", "77"}); - // GroundTruth.emplace("main", 16, "ref.tmp", - // std::set{"66", "9", "72", "73", "71"}); - doAnalysisAndCompareResults("rvo_03_cpp.ll", {"main"}, GroundTruth, true); - ASSERT_FALSE(true) << "Add GroundTruth!"; + GroundTruth.emplace( + "main", 19, "Str", + std::set{"40", "44", "47", "50", "52", "55", "64"}); + GroundTruth.emplace("main", 19, "ref.tmp", + std::set{"14", "2", "20", "21", "24", "25", + "26", "28", "30", "33", "41", "46", + "47", "48"}); + GroundTruth.emplace("main", 19, "ref.tmp1", + std::set{"1", "14", "20", "21", "24", "25", + "26", "28", "30", "33", "49", "50", + "51"}); + doAnalysisAndCompareResults("rvo_03_cpp.ll", {"main"}, GroundTruth, true); } TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_04) { std::set GroundTruth; - // GroundTruth.emplace("main", 16, "retval", std::set{"75", - // "76"}); GroundTruth.emplace("main", 16, "str", - // std::set{"70", "65", "72", "74", "77"}); - // GroundTruth.emplace("main", 16, "ref.tmp", - // std::set{"66", "9", "72", "73", "71"}); + GroundTruth.emplace("main", 12, "retval", std::set{"16"}); + GroundTruth.emplace( + "main", 12, "F", + std::set{"11", "14", "17", "18", "21", "4", "5"}); + GroundTruth.emplace("main", 12, "ref.tmp", + std::set{"11", "18", "4", "5"}); doAnalysisAndCompareResults("rvo_04_cpp.ll", {"main"}, GroundTruth, true); - - ASSERT_FALSE(true) << "Add GroundTruth!"; } } // namespace From ca67f0c89a2a5efaea0d65b91039373a0872b3a9 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 5 Jan 2025 12:24:48 +0100 Subject: [PATCH 29/33] CI Fix for FIIA GlobalTest_05 --- .../IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index 0d1a71ba2f..df7ca581a1 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -444,10 +444,11 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_05) { // NOTE: Facts at init() should be empty, except for its own ID; // g should be strongly updated - GroundTruth.emplace("main", 1, "g", std::set{"0"}); - GroundTruth.emplace("main", 2, "g", std::set{"2"}); - GroundTruth.emplace("main", 4, "call", std::set{"2", "4", "7"}); - GroundTruth.emplace("main", 4, "g", std::set{"2"}); + GroundTruth.emplace("main", 1, "g", std::set{"2"}); + GroundTruth.emplace("main", 2, "g", std::set{"8"}); + GroundTruth.emplace("main", 4, "call", + std::set{"10", "13", "14", "8"}); + GroundTruth.emplace("main", 4, "g", std::set{"8"}); doAnalysisAndCompareResults("global_05_cpp.ll", {"main"}, GroundTruth, true); } From 381e9e7be22c76de5c28d61053b95ff1ab2923d6 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 5 Jan 2025 14:05:19 +0100 Subject: [PATCH 30/33] Fix buffer overflow --- .../DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h index c8edb4f3b2..9cca9dc15e 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -66,7 +66,8 @@ struct IDEFeatureTaintEdgeFact { if (RequiredSize > Taints.size()) { Taints.resize(RequiredSize); } - Taints.setBitsInMask((const uint32_t *)&Facts, sizeof(Facts)); + Taints.setBitsInMask((const uint32_t *)&Facts, + sizeof(Facts) / sizeof(uint32_t)); } void unionWith(const IDEFeatureTaintEdgeFact &Facts) { if (Facts.isTop()) { From 72e654916203d10fa52e450072172f137eb7d70c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel <52407375+fabianbs96@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:20:55 +0100 Subject: [PATCH 31/33] Update flaky tests --- unittests/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 25388cf790..405738b1f8 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -16,13 +16,14 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux") if ("${UBUNTU_MAJOR_VERSION}" GREATER 22) # TODO tests shouldn't be flaky - list(APPEND additional_args -E "\"(LLVMBasedCFGTest|LLVMBasedICFGGlobCtorDtorTest|IDEInstInteractionAnalysisTest|IFDSUninitializedVariablesTest|IDEGeneralizedLCATest|IDEExtendedTaintAnalysisTest)\"") + list(APPEND additional_args -E "\"(LLVMBasedCFGTest|LLVMBasedICFGGlobCtorDtorTest|IDEInstInteractionAnalysisTest|IFDSUninitializedVariablesTest|IDEGeneralizedLCATest|IDEExtendedTaintAnalysisTest|IDEFeatureTaintAnalysis)\"") # LLVMBasedCFGTest.HandlesCppStandardType # IDEInstInteractionAnalysisTest.HandleBasicTest_04 # IFDSUninitializedVariablesTest.UninitTest_05_SHOULD_NOT_LEAK # .UninitTest_06_SHOULD_NOT_LEAK # IDEGeneralizedLCATest.StringTestCpp # IDEExtendedTaintAnalysisTest.XTaint09 + # IDEFeatureTaintAnalysis.HandleBasicTest_04, HandleGlobalTest_05 endif() endif() From 25a83c1bece6e0a32a2af1165ed3a72eb4368b64 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Tue, 4 Mar 2025 20:40:04 +0100 Subject: [PATCH 32/33] Fixes due to LLVM-15 IR --- .../Problems/IDEFeatureTaintAnalysisTest.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index df7ca581a1..eaf0b2f5c6 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -486,10 +486,10 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleReturnTest_01) { TEST_F(IDEInstInteractionAnalysisTest, HandleHeapTest_01) { std::set GroundTruth; - GroundTruth.emplace("main", 19, "retval", std::set{"3"}); - GroundTruth.emplace("main", 19, "i", std::set{"6", "7"}); - GroundTruth.emplace("main", 19, "j", - std::set{"6", "7", "8", "10", "9"}); + GroundTruth.emplace("main", 17, "retval", std::set{"3"}); + GroundTruth.emplace("main", 17, "i", std::set{"5", "6"}); + GroundTruth.emplace("main", 17, "j", + std::set{"5", "6", "7", "8", "9"}); doAnalysisAndCompareResults("heap_01_cpp.ll", {"main"}, GroundTruth, false); } @@ -524,26 +524,26 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_03) { GroundTruth.emplace( "main", 19, "Str", - std::set{"40", "44", "47", "50", "52", "55", "64"}); + std::set{"39", "43", "46", "49", "51", "54", "63"}); GroundTruth.emplace("main", 19, "ref.tmp", - std::set{"14", "2", "20", "21", "24", "25", - "26", "28", "30", "33", "41", "46", - "47", "48"}); + std::set{"13", "19", "2", "20", "23", "24", + "25", "27", "29", "32", "40", "45", + "46", "47"}); GroundTruth.emplace("main", 19, "ref.tmp1", - std::set{"1", "14", "20", "21", "24", "25", - "26", "28", "30", "33", "49", "50", - "51"}); + std::set{"1", "13", "19", "20", "23", "24", + "25", "27", "29", "32", "48", "49", + "50"}); doAnalysisAndCompareResults("rvo_03_cpp.ll", {"main"}, GroundTruth, true); } TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_04) { std::set GroundTruth; - GroundTruth.emplace("main", 12, "retval", std::set{"16"}); + GroundTruth.emplace("main", 10, "retval", std::set{"14"}); GroundTruth.emplace( - "main", 12, "F", - std::set{"11", "14", "17", "18", "21", "4", "5"}); - GroundTruth.emplace("main", 12, "ref.tmp", - std::set{"11", "18", "4", "5"}); + "main", 10, "F", + std::set{"12", "15", "16", "17", "2", "3", "9"}); + GroundTruth.emplace("main", 10, "ref.tmp", + std::set{"16", "17", "2", "3", "9"}); doAnalysisAndCompareResults("rvo_04_cpp.ll", {"main"}, GroundTruth, true); } From 0f946ae91133f3b9b4165985874c0e3533372780 Mon Sep 17 00:00:00 2001 From: Sriteja Kummita Date: Wed, 12 Mar 2025 10:41:08 +0100 Subject: [PATCH 33/33] rename test fixture to IDEFeatureTaintAnalysisTest --- .../Problems/IDEFeatureTaintAnalysisTest.cpp | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp index eaf0b2f5c6..cf0d4ac181 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -33,7 +33,7 @@ static std::string printSet(const std::set &EdgeFact) { } /* ============== TEST FIXTURE ============== */ -class IDEInstInteractionAnalysisTest : public ::testing::Test { +class IDEFeatureTaintAnalysisTest : public ::testing::Test { protected: static constexpr auto PathToLlFiles = PHASAR_BUILD_SUBFOLDER("inst_interaction/"); @@ -206,7 +206,7 @@ class IDEInstInteractionAnalysisTest : public ::testing::Test { }; // Test Fixture -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_01) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_01) { std::set GroundTruth; GroundTruth.emplace("main", 9, "i", std::set{"4"}); GroundTruth.emplace("main", 9, "j", @@ -215,7 +215,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_01) { doAnalysisAndCompareResults("basic_01_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_02) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_02) { std::set GroundTruth; GroundTruth.emplace("main", 24, "retval", std::set{"6"}); GroundTruth.emplace("main", 24, "argc.addr", std::set{"7"}); @@ -228,7 +228,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_02) { doAnalysisAndCompareResults("basic_02_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_03) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_03) { std::set GroundTruth; GroundTruth.emplace("main", 20, "retval", std::set{"3"}); GroundTruth.emplace("main", 20, "i", @@ -238,7 +238,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_03) { doAnalysisAndCompareResults("basic_03_cpp.ll", {"main"}, GroundTruth, false); } -PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_04) { +PHASAR_SKIP_TEST(TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_04) { // If we use libcxx this won't work since internal implementation is different LIBCPP_GTEST_SKIP; @@ -255,14 +255,14 @@ PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_04) { doAnalysisAndCompareResults("basic_04_cpp.ll", {"main"}, GroundTruth, false); }) -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_05) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_05) { std::set GroundTruth; GroundTruth.emplace("main", 11, "i", std::set{"5", "7"}); GroundTruth.emplace("main", 11, "retval", std::set{"2"}); doAnalysisAndCompareResults("basic_05_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_06) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_06) { std::set GroundTruth; GroundTruth.emplace("main", 19, "retval", std::set{"5"}); GroundTruth.emplace("main", 19, "i", std::set{"15", "6", "13"}); @@ -273,7 +273,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_06) { doAnalysisAndCompareResults("basic_06_cpp.ll", {"main"}, GroundTruth, true); } -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_07) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_07) { std::set GroundTruth; GroundTruth.emplace("main", 15, "retval", std::set{"5"}); GroundTruth.emplace("main", 15, "argc.addr", std::set{"6"}); @@ -284,14 +284,14 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_07) { doAnalysisAndCompareResults("basic_07_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_08) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_08) { std::set GroundTruth; GroundTruth.emplace("main", 12, "retval", std::set{"2"}); GroundTruth.emplace("main", 12, "i", std::set{"9"}); doAnalysisAndCompareResults("basic_08_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_09) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_09) { std::set GroundTruth; GroundTruth.emplace("main", 10, "i", std::set{"4"}); GroundTruth.emplace("main", 10, "j", std::set{"4", "6", "7"}); @@ -299,14 +299,14 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_09) { doAnalysisAndCompareResults("basic_09_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_10) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_10) { std::set GroundTruth; GroundTruth.emplace("main", 6, "i", std::set{"3"}); GroundTruth.emplace("main", 6, "retval", std::set{"2"}); doAnalysisAndCompareResults("basic_10_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_11) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_11) { std::set GroundTruth; GroundTruth.emplace("main", 20, "FeatureSelector", std::set{"5", "7", "8"}); @@ -314,7 +314,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleBasicTest_11) { doAnalysisAndCompareResults("basic_11_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_01) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_01) { std::set GroundTruth; GroundTruth.emplace("main", 14, "retval", std::set{"8"}); GroundTruth.emplace("main", 14, "i", std::set{"9"}); @@ -326,7 +326,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_01) { doAnalysisAndCompareResults("call_01_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_02) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_02) { std::set GroundTruth; GroundTruth.emplace("main", 13, "retval", std::set{"12"}); GroundTruth.emplace("main", 13, "i", std::set{"13"}); @@ -337,7 +337,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_02) { doAnalysisAndCompareResults("call_02_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_03) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_03) { std::set GroundTruth; GroundTruth.emplace("main", 10, "retval", std::set{"20"}); GroundTruth.emplace("main", 10, "i", std::set{"21"}); @@ -348,7 +348,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_03) { doAnalysisAndCompareResults("call_03_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_04) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_04) { std::set GroundTruth; GroundTruth.emplace("main", 20, "retval", std::set{"33"}); GroundTruth.emplace("main", 20, "i", std::set{"34"}); @@ -365,7 +365,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_04) { doAnalysisAndCompareResults("call_04_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_05) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_05) { std::set GroundTruth; GroundTruth.emplace("main", 10, "retval", std::set{"8"}); GroundTruth.emplace("main", 10, "i", std::set{"3", "11", "9"}); @@ -373,7 +373,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_05) { doAnalysisAndCompareResults("call_05_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_06) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_06) { // NOTE: Here we are suffering from IntraProceduralAliasesOnly std::set GroundTruth; GroundTruth.emplace("main", 24, "retval", std::set{"11"}); @@ -392,14 +392,14 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_06) { doAnalysisAndCompareResults("call_06_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleCallTest_07) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_07) { std::set GroundTruth; GroundTruth.emplace("main", 6, "retval", std::set{"7"}); GroundTruth.emplace("main", 6, "VarIR", std::set{"6", "3", "8"}); doAnalysisAndCompareResults("call_07_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_01) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_01) { std::set GroundTruth; GroundTruth.emplace("main", 9, "retval", std::set{"3"}); GroundTruth.emplace("main", 9, "i", std::set{"7"}); @@ -407,7 +407,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_01) { doAnalysisAndCompareResults("global_01_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_02) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_02) { std::set GroundTruth; GroundTruth.emplace("_Z5initBv", 2, "a", std::set{"0"}); GroundTruth.emplace("_Z5initBv", 2, "b", std::set{"2"}); @@ -418,7 +418,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_02) { doAnalysisAndCompareResults("global_02_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_03) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_03) { std::set GroundTruth; GroundTruth.emplace("main", 1, "GlobalFeature", std::set{"0"}); GroundTruth.emplace("main", 2, "GlobalFeature", std::set{"0"}); @@ -426,7 +426,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_03) { doAnalysisAndCompareResults("global_03_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_04) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_04) { std::set GroundTruth; GroundTruth.emplace("main", 1, "GlobalFeature", std::set{"0"}); GroundTruth.emplace("main", 2, "GlobalFeature", std::set{"0"}); @@ -438,7 +438,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_04) { doAnalysisAndCompareResults("global_04_cpp.ll", {"main", "_Z7doStuffi"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_05) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_05) { std::set GroundTruth; // NOTE: Facts at init() should be empty, except for its own ID; @@ -453,7 +453,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleGlobalTest_05) { doAnalysisAndCompareResults("global_05_cpp.ll", {"main"}, GroundTruth, true); } -TEST_F(IDEInstInteractionAnalysisTest, KillTest_01) { +TEST_F(IDEFeatureTaintAnalysisTest, KillTest_01) { std::set GroundTruth; GroundTruth.emplace("main", 12, "retval", std::set{"4"}); GroundTruth.emplace("main", 12, "i", std::set{"5"}); @@ -463,7 +463,7 @@ TEST_F(IDEInstInteractionAnalysisTest, KillTest_01) { false); } -TEST_F(IDEInstInteractionAnalysisTest, KillTest_02) { +TEST_F(IDEFeatureTaintAnalysisTest, KillTest_02) { std::set GroundTruth; GroundTruth.emplace("main", 12, "retval", std::set{"6"}); GroundTruth.emplace("main", 12, "A", std::set{"0"}); @@ -473,7 +473,7 @@ TEST_F(IDEInstInteractionAnalysisTest, KillTest_02) { false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleReturnTest_01) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleReturnTest_01) { std::set GroundTruth; GroundTruth.emplace("main", 6, "retval", std::set{"3"}); GroundTruth.emplace("main", 6, "localVar", std::set{"4"}); @@ -484,7 +484,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleReturnTest_01) { doAnalysisAndCompareResults("return_01_cpp.ll", {"main"}, GroundTruth, false); } -TEST_F(IDEInstInteractionAnalysisTest, HandleHeapTest_01) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleHeapTest_01) { std::set GroundTruth; GroundTruth.emplace("main", 17, "retval", std::set{"3"}); GroundTruth.emplace("main", 17, "i", std::set{"5", "6"}); @@ -493,7 +493,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleHeapTest_01) { doAnalysisAndCompareResults("heap_01_cpp.ll", {"main"}, GroundTruth, false); } -PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_01) { +PHASAR_SKIP_TEST(TEST_F(IDEFeatureTaintAnalysisTest, HandleRVOTest_01) { GTEST_SKIP() << "This test heavily depends on the used stdlib version. TODO: " "add a better one"; @@ -506,7 +506,7 @@ PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_01) { doAnalysisAndCompareResults("rvo_01_cpp.ll", {"main"}, GroundTruth, false); }) -PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_02) { +PHASAR_SKIP_TEST(TEST_F(IDEFeatureTaintAnalysisTest, HandleRVOTest_02) { GTEST_SKIP() << "This test heavily depends on the used stdlib version. TODO: " "add a better one"; @@ -519,7 +519,7 @@ PHASAR_SKIP_TEST(TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_02) { doAnalysisAndCompareResults("rvo_02_cpp.ll", {"main"}, GroundTruth, true); }) -TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_03) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleRVOTest_03) { std::set GroundTruth; GroundTruth.emplace( @@ -536,7 +536,7 @@ TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_03) { doAnalysisAndCompareResults("rvo_03_cpp.ll", {"main"}, GroundTruth, true); } -TEST_F(IDEInstInteractionAnalysisTest, HandleRVOTest_04) { +TEST_F(IDEFeatureTaintAnalysisTest, HandleRVOTest_04) { std::set GroundTruth; GroundTruth.emplace("main", 10, "retval", std::set{"14"}); GroundTruth.emplace(