diff --git a/.clang-tidy b/.clang-tidy index 4b361e8559..94ed69701b 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -59,7 +59,7 @@ CheckOptions: - key: readability-identifier-naming.ParameterIgnoredRegexp value: (d|d1|d2|d3|d4|d5|eP|f|n) - key: readability-identifier-naming.FunctionIgnoredRegexp - value: (try_emplace|from_json|to_json|equal_to|to_string|DToString|NToString|FToString|LToString) + value: (try_emplace|from_json|to_json|equal_to|to_string|DToString|NToString|FToString|LToString|hash_value) - key: cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor value: 1 - key: cppcoreguidelines-special-member-functions.AllowMissingMoveFunctions diff --git a/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h b/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h index 2d05cb54c4..e78af20707 100644 --- a/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h +++ b/include/phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h @@ -22,25 +22,25 @@ namespace psr { /// hash_value(const EdgeFunctionTy&). /// /// This cache is *not* thread-safe. -template -class DefaultEdgeFunctionSingletonCache +template +class DefaultEdgeFunctionSingletonCacheImpl : public EdgeFunctionSingletonCache { public: - DefaultEdgeFunctionSingletonCache() noexcept = default; + DefaultEdgeFunctionSingletonCacheImpl() noexcept = default; - DefaultEdgeFunctionSingletonCache(const DefaultEdgeFunctionSingletonCache &) = - delete; - DefaultEdgeFunctionSingletonCache & - operator=(const DefaultEdgeFunctionSingletonCache &) = delete; + DefaultEdgeFunctionSingletonCacheImpl( + const DefaultEdgeFunctionSingletonCacheImpl &) = delete; + DefaultEdgeFunctionSingletonCacheImpl & + operator=(const DefaultEdgeFunctionSingletonCacheImpl &) = delete; - DefaultEdgeFunctionSingletonCache( - DefaultEdgeFunctionSingletonCache &&) noexcept = default; - DefaultEdgeFunctionSingletonCache & - operator=(DefaultEdgeFunctionSingletonCache &&) noexcept = delete; - ~DefaultEdgeFunctionSingletonCache() override = default; + DefaultEdgeFunctionSingletonCacheImpl( + DefaultEdgeFunctionSingletonCacheImpl &&) noexcept = default; + DefaultEdgeFunctionSingletonCacheImpl & + operator=(DefaultEdgeFunctionSingletonCacheImpl &&) noexcept = delete; + ~DefaultEdgeFunctionSingletonCacheImpl() override = default; [[nodiscard]] const void * - lookup(ByConstRef EF) const noexcept override { + lookup(const EdgeFunctionTy &EF) const noexcept override { return Cache.lookup(&EF); } @@ -50,13 +50,10 @@ class DefaultEdgeFunctionSingletonCache assert(Inserted); } - void erase(ByConstRef EF) noexcept override { - Cache.erase(&EF); - } + void erase(const EdgeFunctionTy &EF) noexcept override { Cache.erase(&EF); } template - [[nodiscard]] EdgeFunction - createEdgeFunction(ArgTys &&...Args) { + [[nodiscard]] EdgeFunction createEdgeFunction(ArgTys &&...Args) { return CachedEdgeFunction{ EdgeFunctionTy{std::forward(Args)...}, this}; } @@ -92,19 +89,29 @@ class DefaultEdgeFunctionSingletonCache llvm::DenseMap Cache; }; +template +class DefaultEdgeFunctionSingletonCache + : public DefaultEdgeFunctionSingletonCacheImpl< + EdgeFunctionTy, typename EdgeFunctionTy::l_t> { +public: + using DefaultEdgeFunctionSingletonCacheImpl< + EdgeFunctionTy, + typename EdgeFunctionTy::l_t>::DefaultEdgeFunctionSingletonCacheImpl; +}; + template class DefaultEdgeFunctionSingletonCache< EdgeFunctionTy, std::enable_if_t>> { public: [[nodiscard]] const void * - lookup(ByConstRef /*EF*/) const noexcept override { + lookup(const EdgeFunctionTy & /*EF*/) const noexcept override { return nullptr; } void insert(const EdgeFunctionTy * /*EF*/, const void * /*Mem*/) override { assert(false && "We should never go here"); } - void erase(ByConstRef /*EF*/) noexcept override { + void erase(const EdgeFunctionTy & /*EF*/) noexcept override { assert(false && "We should never go here"); } [[nodiscard]] EdgeFunction diff --git a/include/phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h b/include/phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h index 53e652c0fd..8b6c096633 100644 --- a/include/phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h +++ b/include/phasar/DataFlow/IfdsIde/EdgeFunctionSingletonCache.h @@ -46,7 +46,7 @@ template class EdgeFunctionSingletonCache { /// Checks whether the edge function EF is cached in this cache. Returns the /// cached entry if found, else nullptr. [[nodiscard]] virtual const void * - lookup(ByConstRef EF) const noexcept = 0; + lookup(const EdgeFunctionTy &EF) const noexcept = 0; /// Inserts the cache-entry Mem for the edge function *EF into the cache. /// Typically, EF points into the buffer pointed to by Mem. Both pointers are @@ -57,7 +57,7 @@ template class EdgeFunctionSingletonCache { /// Erases the cache-entry associated with the edge function EF from the /// cache. - virtual void erase(ByConstRef EF) noexcept = 0; + virtual void erase(const EdgeFunctionTy &EF) noexcept = 0; template [[nodiscard]] auto createEdgeFunction(ArgTys &&...Args) { diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index 4537c278e2..c416571c2f 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -500,13 +500,13 @@ template class FlowFunctionTemplates { struct GenManyAndKillAllOthers final : public FlowFunction { GenManyAndKillAllOthers(Container &&GenValues, d_t FromValue) - : GenValues(std::move(GenValues)), FromValue(std::move(FromValue)) {} + : GenValues(std::move(GenValues)), FromValue(FromValue) { + this->GenValues.insert(std::move(FromValue)); + } container_type computeTargets(d_t Source) override { if (Source == FromValue) { - auto Ret = GenValues; - Ret.insert(std::move(Source)); - return Ret; + return GenValues; } return {}; } diff --git a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h index 60e5ae23e3..db7ea654e7 100644 --- a/include/phasar/DataFlow/IfdsIde/InitialSeeds.h +++ b/include/phasar/DataFlow/IfdsIde/InitialSeeds.h @@ -11,6 +11,7 @@ #define PHASAR_DATAFLOW_IFDSIDE_INITIALSEEDS_H #include "phasar/Domain/BinaryDomain.h" +#include "phasar/Utils/Printer.h" #include "phasar/Utils/TypeTraits.h" #include "llvm/Support/Compiler.h" @@ -75,36 +76,13 @@ template class InitialSeeds { [[nodiscard]] GeneralizedSeeds getSeeds() && { return std::move(Seeds); } void dump(llvm::raw_ostream &OS = llvm::errs()) const { - - auto printNode = [&](auto &&Node) { // NOLINT - if constexpr (std::is_pointer_v && - is_llvm_printable_v>) { - OS << *Node; - } else { - OS << Node; - } - }; - - auto printFact = [&](auto &&Node) { // NOLINT - if constexpr (std::is_pointer_v && - is_llvm_printable_v>) { - OS << *Node; - } else { - OS << Node; - } - }; - OS << "======================== Initial Seeds ========================\n"; for (const auto &[Node, Facts] : Seeds) { - OS << "At "; - printNode(Node); - OS << "\n"; + OS << "At " << NToString(Node) << '\n'; for (const auto &[Fact, Value] : Facts) { - OS << "> "; - printFact(Fact); - OS << " --> \\." << Value << "\n"; + OS << "> " << DToString(Fact) << " --> \\." << LToString(Value) << '\n'; } - OS << "\n"; + OS << '\n'; } OS << "========================== End Seeds ==========================\n"; } diff --git a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h index 6a986d99c7..c92c35aae3 100644 --- a/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h +++ b/include/phasar/DataFlow/PathSensitivity/PathSensitivityManagerMixin.h @@ -26,6 +26,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/raw_ostream.h" #include @@ -33,10 +34,6 @@ #include #include -namespace llvm { -class DbgInfoIntrinsic; -} // namespace llvm - namespace psr { template class PathSensitivityManagerMixin { diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h index 4d17597b70..b4f7676f6f 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h @@ -11,6 +11,7 @@ #define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_LLVMSOLVERRESULTS_H #include "phasar/DataFlow/IfdsIde/SolverResults.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Utils/JoinLattice.h" #include "phasar/Utils/Logger.h" diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h new file mode 100644 index 0000000000..9cca9dc15e --- /dev/null +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h @@ -0,0 +1,377 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEFEATURETAINTANALYSIS_H +#define PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEFEATURETAINTANALYSIS_H + +#include "phasar/DataFlow/IfdsIde/DefaultEdgeFunctionSingletonCache.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" +#include "phasar/DataFlow/IfdsIde/IDETabulationProblem.h" +#include "phasar/PhasarLLVM/ControlFlow/Resolver/Resolver.h" +#include "phasar/PhasarLLVM/Domain/LLVMAnalysisDomain.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/Utils/BitVectorSet.h" +#include "phasar/Utils/JoinLattice.h" +#include "phasar/Utils/Printer.h" +#include "phasar/Utils/TypeTraits.h" + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" + +#include +#include +#include +#include +#include + +namespace llvm { +class GlobalVariable; +} // namespace llvm + +namespace psr { +class LLVMProjectIRDB; + +struct IDEFeatureTaintEdgeFact { + llvm::SmallBitVector Taints; + + [[nodiscard]] static llvm::SmallBitVector fromBits(uintptr_t Bits) { +#if __has_builtin(__builtin_constant_p) + if (__builtin_constant_p(Bits) && Bits == 0) { + return {}; + } +#endif + + llvm::SmallBitVector Ret(llvm::findLastSet(Bits) + 1); + Ret.setBitsInMask((const uint32_t *)&Bits, sizeof(Bits)); + return Ret; + } + + IDEFeatureTaintEdgeFact(llvm::SmallBitVector &&Taints) noexcept + : Taints(std::move(Taints)) {} + IDEFeatureTaintEdgeFact(const llvm::SmallBitVector &Taints) + : Taints(Taints) {} + IDEFeatureTaintEdgeFact(uintptr_t Taints) noexcept + : Taints(fromBits(Taints)) {} + explicit IDEFeatureTaintEdgeFact() noexcept { Taints.invalid(); } + + void unionWith(uintptr_t Facts) { + auto RequiredSize = llvm::findLastSet(Facts) + 1; + if (RequiredSize > Taints.size()) { + Taints.resize(RequiredSize); + } + Taints.setBitsInMask((const uint32_t *)&Facts, + sizeof(Facts) / sizeof(uint32_t)); + } + void unionWith(const IDEFeatureTaintEdgeFact &Facts) { + if (Facts.isTop()) { + return; + } + Taints |= Facts.Taints; + } + + /// Checks whether this set contains no facts. + /// Note: Top also counts as empty + [[nodiscard]] inline bool empty() const noexcept { + return isTop() || Taints.none(); + } + /// Returns the number of facts in this set. + /// Note: Top counts as empty + [[nodiscard]] inline size_t size() const noexcept { + if (isTop()) { + return 0; + } + return Taints.count(); + } + + [[nodiscard]] inline bool isBottom() const noexcept { return Taints.empty(); } + [[nodiscard]] inline bool isTop() const noexcept { + return Taints.isInvalid(); + } + + [[nodiscard]] friend llvm::hash_code + hash_value(const IDEFeatureTaintEdgeFact &BV) noexcept { + if (BV.Taints.empty()) { + return {}; + } + uintptr_t Buf; + auto Words = BV.Taints.getData(Buf); + size_t Idx = Words.size(); + while (Idx && Words[Idx - 1] == 0) { + --Idx; + } + return llvm::hash_combine_range(Words.begin(), + std::next(Words.begin(), Idx)); + } + + friend bool operator==(const IDEFeatureTaintEdgeFact &Lhs, + const IDEFeatureTaintEdgeFact &Rhs) noexcept { + bool LeftEmpty = Lhs.Taints.none(); + bool RightEmpty = Rhs.Taints.none(); + if (LeftEmpty || RightEmpty) { + return LeftEmpty == RightEmpty; + } + // Check, whether Lhs and Rhs actually have the same bits set and not + // whether their internal representation is exactly identitcal + + uintptr_t LBuf, RBuf; + auto LhsWords = Lhs.Taints.getData(LBuf); + auto RhsWords = Rhs.Taints.getData(RBuf); + if (LhsWords.size() == RhsWords.size()) { + return LhsWords == RhsWords; + } + auto MinSize = std::min(LhsWords.size(), RhsWords.size()); + if (LhsWords.slice(0, MinSize) != RhsWords.slice(0, MinSize)) { + return false; + } + auto Rest = (LhsWords.size() > RhsWords.size() ? LhsWords : RhsWords) + .slice(MinSize); + return std::all_of(Rest.begin(), Rest.end(), + [](auto Word) { return Word == 0; }); + } + + friend bool operator!=(const IDEFeatureTaintEdgeFact &Lhs, + const IDEFeatureTaintEdgeFact &Rhs) noexcept { + return !(Lhs == Rhs); + } + + template [[nodiscard]] std::string str() const { + auto BV = BitVectorSet::fromBits(Taints); + return LToString(BV); + } + + template [[nodiscard]] auto toBVSet() const { + if (isTop()) { + return BitVectorSet(); + } + return BitVectorSet::fromBits(Taints); + } + + template [[nodiscard]] auto toSet() const { + std::set Ret; + if (isTop()) { + return Ret; + } + + for (const auto &Elem : this->template toBVSet()) { + Ret.insert(Elem); + } + return Ret; + } +}; + +[[nodiscard]] std::string LToString(const IDEFeatureTaintEdgeFact &EdgeFact); + +template <> struct JoinLatticeTraits { + inline static IDEFeatureTaintEdgeFact top() { + IDEFeatureTaintEdgeFact Ret{}; + return Ret; + } + inline static IDEFeatureTaintEdgeFact bottom() { + // TODO + return 0; + } + inline static IDEFeatureTaintEdgeFact join(const IDEFeatureTaintEdgeFact &L, + const IDEFeatureTaintEdgeFact &R) { + if (L.isTop()) { + return R; + } + if (R.isTop()) { + return L; + } + auto Ret = L; + Ret.Taints |= R.Taints; + return Ret; + } +}; + +struct IDEFeatureTaintAnalysisDomain : LLVMAnalysisDomainDefault { + using l_t = IDEFeatureTaintEdgeFact; +}; + +class FeatureTaintGenerator { +public: + using InstOrGlobal = + std::variant; + + using GenerateTaintsFn = + llvm::unique_function; + using IsSourceFn = llvm::unique_function; + using PrinterFn = + llvm::unique_function; + + template + static GenerateTaintsFn createGenerateTaints(EdgeFactGenerator &&EFGen) { + return [EFGen{std::forward(EFGen)}](InstOrGlobal IG) { + const auto &TaintSet = std::invoke(EFGen, IG); + BitVectorSet, llvm::SmallBitVector> BV( + llvm::adl_begin(TaintSet), llvm::adl_end(TaintSet)); + + return IDEFeatureTaintEdgeFact{std::move(BV).getBits()}; + }; + } + + template + static PrinterFn createEdgeFactPrinter() { + using ElemTy = + ElementType>; + return [](const IDEFeatureTaintEdgeFact &Fact) { + auto BV = + BitVectorSet::fromBits(Fact.Taints); + return LToString(BV); + }; + } + + FeatureTaintGenerator(IsSourceFn IsFeatureSource, + GenerateTaintsFn GenerateTaints, PrinterFn Printer) + : IsFeatureSource(std::move(IsFeatureSource)), + GenerateTaints(std::move(GenerateTaints)), Printer(std::move(Printer)) { + } + + template + FeatureTaintGenerator(SourceDetector &&SrcDetector, EdgeFactGenerator &&EFGen) + : IsFeatureSource(std::forward(SrcDetector)), + GenerateTaints( + createGenerateTaints(std::forward(EFGen))), + Printer(createEdgeFactPrinter()) {} + + template >>> + FeatureTaintGenerator(EdgeFactGenerator &&EFGen) + : FeatureTaintGenerator( + [EFGen](InstOrGlobal IG) { + return !llvm::empty(std::invoke(EFGen, IG)); + }, + std::forward(EFGen)) {} + + [[nodiscard]] bool isSource(InstOrGlobal IG) const { + return IsFeatureSource(IG); + } + + [[nodiscard]] IDEFeatureTaintEdgeFact + getGeneratedTaintsAt(InstOrGlobal IG) const { + return GenerateTaints(IG); + } + + [[nodiscard]] std::string + toString(const IDEFeatureTaintEdgeFact &Fact) const { + return Printer(Fact); + } + +private: + IsSourceFn IsFeatureSource; + GenerateTaintsFn GenerateTaints; + PrinterFn Printer; +}; + +class IDEFeatureTaintAnalysis + : public IDETabulationProblem { + +public: + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + FeatureTaintGenerator &&TaintGen); + + template + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + EdgeFactGenerator &&EFGen) + : IDEFeatureTaintAnalysis( + IRDB, PT, std::move(EntryPoints), + FeatureTaintGenerator(std::forward(EFGen))) {} + + template + IDEFeatureTaintAnalysis(const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, + SourceDetector &&SrcDetector, + EdgeFactGenerator &&EFGen) + : IDEFeatureTaintAnalysis( + IRDB, PT, std::move(EntryPoints), + FeatureTaintGenerator(std::forward(SrcDetector), + std::forward(EFGen))) {} + + IDEFeatureTaintAnalysis(const IDEFeatureTaintAnalysis &) = delete; + IDEFeatureTaintAnalysis &operator=(const IDEFeatureTaintAnalysis &) = delete; + + IDEFeatureTaintAnalysis(IDEFeatureTaintAnalysis &&) noexcept = default; + IDEFeatureTaintAnalysis & + operator=(IDEFeatureTaintAnalysis &&) noexcept = delete; + + // The EF Caches are incomplete, so move the dtor into the .cpp + ~IDEFeatureTaintAnalysis() override; + + ////////////////////////////////////////////////////////////////////////////// + /// Flow Functions + ////////////////////////////////////////////////////////////////////////////// + + FlowFunctionPtrType getNormalFlowFunction(n_t Curr, n_t Succ) override; + + FlowFunctionPtrType getCallFlowFunction(n_t CallSite, f_t DestFun) override; + + FlowFunctionPtrType getRetFlowFunction(n_t CallSite, f_t CalleeFun, + n_t ExitInst, n_t RetSite) override; + FlowFunctionPtrType + getCallToRetFlowFunction(n_t CallSite, n_t RetSite, + llvm::ArrayRef Callees) override; + FlowFunctionPtrType getSummaryFlowFunction(n_t CallSite, + f_t DestFun) override; + + ////////////////////////////////////////////////////////////////////////////// + /// Edge Functions + ////////////////////////////////////////////////////////////////////////////// + + EdgeFunction getNormalEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, + d_t SuccNode) override; + + EdgeFunction getCallEdgeFunction(n_t CallSite, d_t SrcNode, + f_t DestinationFunction, + d_t DestNode) override; + + EdgeFunction getReturnEdgeFunction(n_t CallSite, f_t CalleeFunction, + n_t ExitStmt, d_t ExitNode, + n_t RetSite, d_t RetNode) override; + + EdgeFunction + getCallToRetEdgeFunction(n_t CallSite, d_t CallNode, n_t RetSite, + d_t RetSiteNode, + llvm::ArrayRef Callees) override; + + EdgeFunction getSummaryEdgeFunction(n_t Curr, d_t CurrNode, n_t Succ, + d_t SuccNode) override; + + ////////////////////////////////////////////////////////////////////////////// + /// Misc + ////////////////////////////////////////////////////////////////////////////// + + InitialSeeds initialSeeds() override; + + bool isZeroValue(d_t FlowFact) const noexcept override; + + void emitTextReport(GenericSolverResults SR, + llvm::raw_ostream &OS = llvm::outs()) override; + + EdgeFunction extend(const EdgeFunction &FirstEF, + const EdgeFunction &SecondEF) override; + EdgeFunction combine(const EdgeFunction &FirstEF, + const EdgeFunction &OtherEF) override; + +private: + FeatureTaintGenerator TaintGen; + LLVMAliasInfoRef PT; + + struct GenerateEF; + struct AddFactsEF; + DefaultEdgeFunctionSingletonCacheImpl GenEFCache; + DefaultEdgeFunctionSingletonCacheImpl AddEFCache; +}; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_DATAFLOW_IFDSIDE_PROBLEMS_IDEFEATURETAINTANALYSIS_H diff --git a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def index 564fb245b7..5c11905fc1 100644 --- a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def +++ b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def @@ -23,6 +23,7 @@ DATA_FLOW_ANALYSIS_TYPES(IFDSSolverTest, "ifds-solvertest", "Empty analysis. Jus DATA_FLOW_ANALYSIS_TYPES(IDELinearConstantAnalysis, "ide-lca", "Simple linear constant propagation") DATA_FLOW_ANALYSIS_TYPES(IDESolverTest, "ide-solvertest", "Empty analysis. Just to see that the IDE solver works") DATA_FLOW_ANALYSIS_TYPES(IDEInstInteractionAnalysis, "ide-iia", "Which instruction has influence on which other instructions?") +DATA_FLOW_ANALYSIS_TYPES(IDEFeatureTaintAnalysis, "ide-fiia", "Which instruction has influence on which other instructions?") DATA_FLOW_ANALYSIS_TYPES(IntraMonoFullConstantPropagation, "intra-mono-fca", "Simple constant propagation without the restriction to linear binary operations. Only works inTRA-procedurally") DATA_FLOW_ANALYSIS_TYPES(IntraMonoSolverTest, "intra-mono-solvertest", "Empty analysis. Just to see that the intraprocedural monotone solver works") DATA_FLOW_ANALYSIS_TYPES(InterMonoSolverTest, "inter-mono-solvertest", "Empty analysis. Just to see that the interprocedural monotone solver works") diff --git a/include/phasar/Utils/BitVectorSet.h b/include/phasar/Utils/BitVectorSet.h index d4ffce45df..c64f8f41bf 100644 --- a/include/phasar/Utils/BitVectorSet.h +++ b/include/phasar/Utils/BitVectorSet.h @@ -12,6 +12,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" @@ -20,16 +21,18 @@ #include #include +#include #include namespace psr { namespace internal { + inline bool isLess(const llvm::BitVector &Lhs, const llvm::BitVector &Rhs) { unsigned LhsBits = Lhs.size(); unsigned RhsBits = Rhs.size(); if (LhsBits > RhsBits) { - if (Lhs.find_first_in(RhsBits, LhsBits) != -1) { + if (Lhs.find_next(RhsBits) != -1) { return false; } } else if (LhsBits < RhsBits) { @@ -47,6 +50,15 @@ inline bool isLess(const llvm::BitVector &Lhs, const llvm::BitVector &Rhs) { } return false; } + +inline llvm::ArrayRef getWords(const llvm::BitVector &BV, + uintptr_t & /*Store*/) { + return BV.getData(); +} +inline llvm::ArrayRef getWords(const llvm::SmallBitVector &BV, + uintptr_t &Store) { + return BV.getData(Store); +} } // namespace internal /** @@ -56,19 +68,22 @@ inline bool isLess(const llvm::BitVector &Lhs, const llvm::BitVector &Rhs) { * * @brief Implements a set that requires minimal space. */ -template class BitVectorSet { -private: +template +class BitVectorSet { +public: // Using boost::hash causes ambiguity for hash_value(): // - // - // - using bimap_t = boost::bimap>, boost::bimaps::unordered_set_of>; + +private: inline static bimap_t Position; // NOLINT - llvm::BitVector Bits; + BitVectorTy Bits; template class BitVectorSetIterator { - llvm::BitVector Bits; + BitVectorTy Bits; public: using iterator_category = std::forward_iterator_tag; @@ -83,7 +98,7 @@ template class BitVectorSet { return *this; } - void setBits(const llvm::BitVector &OtherBits) { Bits = OtherBits; } + void setBits(const BitVectorTy &OtherBits) { Bits = OtherBits; } bool operator==(const BitVectorSetIterator &OtherIterator) const { return PosPtr == OtherIterator.getPtr(); @@ -152,7 +167,7 @@ template class BitVectorSet { // T getVal() {return pos_ptr->second;} - [[nodiscard]] llvm::BitVector getBits() const { return Bits; } + [[nodiscard]] const BitVectorTy &getBits() const { return Bits; } private: D PosPtr; @@ -176,6 +191,12 @@ template class BitVectorSet { insert(First, Last); } + static BitVectorSet fromBits(BitVectorTy Bits) { + BitVectorSet Ret; + Ret.Bits = std::move(Bits); + return Ret; + } + [[nodiscard]] BitVectorSet setUnion(const BitVectorSet &Other) const { const bool ThisSetIsSmaller = Bits.size() < Other.Bits.size(); BitVectorSet Res = ThisSetIsSmaller ? Other : *this; @@ -271,6 +292,10 @@ template class BitVectorSet { [[nodiscard]] size_t size() const noexcept { return Bits.count(); } + [[nodiscard]] const BitVectorTy &getBits() const &noexcept { return Bits; } + [[nodiscard]] BitVectorTy &getBits() &noexcept { return Bits; } + [[nodiscard]] BitVectorTy &&getBits() &&noexcept { return std::move(Bits); } + friend bool operator==(const BitVectorSet &Lhs, const BitVectorSet &Rhs) { bool LeftEmpty = Lhs.empty(); bool RightEmpty = Rhs.empty(); @@ -279,8 +304,10 @@ template class BitVectorSet { } // Check, whether Lhs and Rhs actually have the same bits set and not // whether their internal representation is exactly identitcal - auto LhsWords = Lhs.Bits.getData(); - auto RhsWords = Rhs.Bits.getData(); + + uintptr_t LStore{}, RStore{}; + auto LhsWords = internal::getWords(Lhs.Bits, LStore); + auto RhsWords = internal::getWords(Rhs.Bits, RStore); if (LhsWords.size() == RhsWords.size()) { return LhsWords == RhsWords; } @@ -307,7 +334,8 @@ template class BitVectorSet { if (BV.Bits.empty()) { return {}; } - auto Words = BV.Bits.getData(); + uintptr_t Store{}; + auto Words = internal::getWords(BV.Bits, Store); size_t Idx = Words.size(); while (Idx && Words[Idx - 1] == 0) { --Idx; diff --git a/include/phasar/Utils/TypeTraits.h b/include/phasar/Utils/TypeTraits.h index e70ea32f89..7f0023af2f 100644 --- a/include/phasar/Utils/TypeTraits.h +++ b/include/phasar/Utils/TypeTraits.h @@ -17,6 +17,7 @@ #include "nlohmann/json.hpp" +#include #include #include #include @@ -174,6 +175,12 @@ struct variant_idx, T> size_t, std::variant...>(type_identity{}).index()> {}; +template struct ElementType { + using IteratorTy = + std::decay_t()))>; + using type = typename std::iterator_traits::value_type; +}; + template struct has_isInteresting : std::false_type {}; // NOLINT template @@ -275,6 +282,8 @@ template using type_identity_t = typename type_identity::type; template static constexpr size_t variant_idx = detail::variant_idx::value; +template +using ElementType = typename detail::ElementType::type; template struct has_getAsJson : std::false_type {}; // NOLINT template diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp new file mode 100644 index 0000000000..1941ba9ef6 --- /dev/null +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.cpp @@ -0,0 +1,715 @@ +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" + +#include "phasar/DataFlow/IfdsIde/EdgeFunction.h" +#include "phasar/DataFlow/IfdsIde/EdgeFunctionUtils.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMFlowFunctions.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMZeroValue.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Printer.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include + +using namespace psr; + +using l_t = IDEFeatureTaintAnalysisDomain::l_t; +using d_t = IDEFeatureTaintAnalysisDomain::d_t; + +IDEFeatureTaintAnalysis::IDEFeatureTaintAnalysis( + const LLVMProjectIRDB *IRDB, LLVMAliasInfoRef PT, + std::vector EntryPoints, FeatureTaintGenerator &&TaintGen) + : IDETabulationProblem( + IRDB, std::move(EntryPoints), LLVMZeroValue::getInstance()), + TaintGen(std::move(TaintGen)), PT(PT) {} + +IDEFeatureTaintAnalysis::~IDEFeatureTaintAnalysis() = default; + +std::string psr::LToString(const IDEFeatureTaintEdgeFact &EdgeFact) { + std::string Ret; + llvm::raw_string_ostream ROS(Ret); + ROS << '<'; + llvm::interleaveComma(EdgeFact.Taints.set_bits(), ROS); + ROS << '>'; + return Ret; +} + +static bool isMustAlias(const llvm::Value *Val1, + const llvm::Value *Val2) noexcept { + + const auto *Base1 = Val1->stripPointerCastsAndAliases(); + const auto *Base2 = Val2->stripPointerCastsAndAliases(); + if (Base1 == Base2) { + return true; + } + + // Note: We are not field-sensitive + + const auto *Load1 = llvm::dyn_cast(Base1); + if (Load1 && + Load1->getPointerOperand()->stripPointerCastsAndAliases() == Base2) { + return true; + } + + const auto *Load2 = llvm::dyn_cast(Base2); + if (Load2 && + Load2->getPointerOperand()->stripPointerCastsAndAliases() == Base1) { + return true; + } + if (Load1 && Load2 && + Load1->getPointerOperand() == Load2->getPointerOperand()) { + return true; + } + + // XXX: handle more cases + + return false; +} + +static bool canKillPointerOp(const llvm::Value *PointerOp, + const llvm::Value *Src, + const IDEFeatureTaintAnalysis::container_type + &PointerOpMayAliases) noexcept { + if (PointerOp == Src || isMustAlias(PointerOp, Src)) { + return true; + } + + // For precision, we may want to kill some facts unsoundly + + if (llvm::isa(Src) || + llvm::isa(PointerOp)) { + return PointerOpMayAliases.count(Src); + } + + return false; +} + +static auto getStoreFF(bool GeneratesFact, LLVMAliasInfoRef PT, + const llvm::Instruction *Inst, const llvm::Value *Dest, + const llvm::Value *Value) { + + using container_type = IDEFeatureTaintAnalysis::container_type; + + auto PointerPTS = PT.getReachableAllocationSites(Dest, true, Inst); + container_type PointerRet(PointerPTS->begin(), PointerPTS->end()); + PointerRet.insert(Dest); + + auto ValuePTS = PT.getReachableAllocationSites(Value, true, Inst); + + return FlowFunctionTemplates::lambdaFlow( + [Dest, Value, PointerRet = std::move(PointerRet), + ValuePTS = std::move(ValuePTS), + GeneratesFact](d_t Src) -> container_type { + if (Dest == Src || (PointerRet.count(Src) && + canKillPointerOp(Dest, Src, PointerRet))) { + return {}; + } + container_type Facts = [&] { + // y/Y now obtains its new value(s) from x/X + // If a value is stored that holds we must generate all potential + // memory locations the store might write to. + // ... or from zero, if we manually generate a fact here + if (Value == Src || + (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) || + ValuePTS->count(Src)) { + return PointerRet; + } + + return container_type(); + }(); + + Facts.insert(Src); + return Facts; + }); +} + +auto IDEFeatureTaintAnalysis::getNormalFlowFunction(n_t Curr, n_t /* Succ */) + -> FlowFunctionPtrType { + bool GeneratesFact = TaintGen.isSource(Curr); + + if (const auto *Alloca = llvm::dyn_cast(Curr)) { + if (GeneratesFact) { + return generateFromZero(Alloca); + } + return identityFlow(); + } + + if (const auto *Load = llvm::dyn_cast(Curr)) { + return lambdaFlow( + [GeneratesFact, Load, PointerOp = Load->getPointerOperand(), + PTS = PT.getReachableAllocationSites(Load->getPointerOperand(), true)]( + d_t Source) -> container_type { + bool GenFromZero = + GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Source); + + if (GenFromZero || Source == PointerOp || PTS->count(Source)) { + return {Source, Load}; + } + + return {Source}; + }); + } + + if (const auto *Store = llvm::dyn_cast(Curr)) { + return getStoreFF(GeneratesFact, PT, Store, Store->getPointerOperand(), + Store->getValueOperand()); + } + + // Fallback + return lambdaFlow([Inst = Curr, GeneratesFact](d_t Src) { + container_type Facts; + Facts.insert(Src); + if (LLVMZeroValue::isLLVMZeroValue(Src)) { + if (GeneratesFact) { + Facts.insert(Inst); + } + return Facts; + } + + // continue syntactic propagation: populate and propagate other existing + // facts + for (const auto &Op : Inst->operands()) { + // if one of the operands holds, also generate the instruction using + // it + if (Op == Src) { + Facts.insert(Inst); + } + } + return Facts; + }); +} + +auto IDEFeatureTaintAnalysis::getCallFlowFunction(n_t CallSite, f_t DestFun) + -> FlowFunctionPtrType { + + if (DestFun->isDeclaration()) { + // We don't have anything that we could analyze, kill all facts. + return killAllFlows(); + } + + const auto *CS = llvm::cast(CallSite); + + // Map actual to formal parameters. + auto MapFactsToCalleeFF = mapFactsToCallee( + CS, DestFun, [CS](const llvm::Value *ActualArg, ByConstRef Src) { + if (ActualArg != Src && + ActualArg->stripPointerCastsAndAliases() != Src) { + return false; + } + + if (CS->hasStructRetAttr() && ActualArg == CS->getArgOperand(0)) { + return false; + } + + return true; + }); + + return MapFactsToCalleeFF; +} + +auto IDEFeatureTaintAnalysis::getRetFlowFunction(n_t CallSite, + f_t /*CalleeFun*/, + n_t ExitInst, + n_t /* RetSite */) + -> FlowFunctionPtrType { + // Map return value back to the caller. If pointer parameters hold at the + // end of a callee function generate all of those in the caller context. + if (CallSite == nullptr) { + return killAllFlows(); + } + + const auto *RetInst = llvm::dyn_cast(ExitInst); + auto *RetVal = RetInst ? RetInst->getReturnValue() : nullptr; + bool GeneratesFact = llvm::isa_and_nonnull(RetVal) && + TaintGen.isSource(ExitInst); + return mapFactsToCaller( + llvm::cast(CallSite), ExitInst, {}, + [GeneratesFact](const llvm::Value *RetVal, d_t Src) { + if (Src == RetVal) { + return true; + } + if (GeneratesFact && LLVMZeroValue::isLLVMZeroValue(Src)) { + return true; + } + return false; + }); +} + +auto IDEFeatureTaintAnalysis::getCallToRetFlowFunction( + n_t CallSite, n_t /* RetSite */, llvm::ArrayRef Callees) + -> FlowFunctionPtrType { + + const auto *Call = llvm::cast(CallSite); + + // Assume, the sret param is at index 0 + const llvm::Value *RetVal = + !Call->getType()->isVoidTy() + ? CallSite + : (Call->hasStructRetAttr() ? Call->getArgOperand(0) : nullptr); + + bool GeneratesFact = RetVal && TaintGen.isSource(CallSite); + + if (llvm::all_of(Callees, [](f_t Fun) { return Fun->isDeclaration(); })) { + if (GeneratesFact) { + return generateFromZero(RetVal); + } + return identityFlow(); + } + + auto Mapper = mapFactsAlongsideCallSite( + Call, + [RetVal](d_t Arg) { + if (RetVal == Arg) { + // perform strong update here + return false; + } + + return true; + }, + /*PropagateGlobals*/ false); + + if (GeneratesFact) { + return unionFlows(std::move(Mapper), + generateFlowAndKillAllOthers(RetVal, getZeroValue())); + } + return Mapper; +} + +auto IDEFeatureTaintAnalysis::getSummaryFlowFunction(n_t CallSite, + f_t /*DestFun*/) + -> FlowFunctionPtrType { + if (const auto *MemTrn = llvm::dyn_cast(CallSite)) { + return getStoreFF(TaintGen.isSource(CallSite), PT, MemTrn, + MemTrn->getDest(), MemTrn->getSource()); + } + if (const auto *MemSet = llvm::dyn_cast(CallSite)) { + return getStoreFF(TaintGen.isSource(CallSite), PT, MemSet, + MemSet->getDest(), MemSet->getValue()); + } + + return nullptr; +} + +struct IDEFeatureTaintAnalysis::AddFactsEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; + + IDEFeatureTaintEdgeFact Facts; + + [[nodiscard]] l_t computeTarget(l_t Source) const { + Source.Taints |= Facts.Taints; + return Source; + } + + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { + llvm::report_fatal_error("Implemented in 'extend'"); + } + + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { + llvm::report_fatal_error("Implemented in 'combine'"); + } + + friend bool operator==(const AddFactsEF &L, const AddFactsEF &R) { + return L.Facts == R.Facts; + } + + friend llvm::hash_code hash_value(const AddFactsEF &EF) { + return hash_value(EF.Facts); + } +}; + +struct IDEFeatureTaintAnalysis::GenerateEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; + + IDEFeatureTaintEdgeFact Facts; + + [[nodiscard]] bool isConstant() const noexcept { return true; } + + [[nodiscard]] l_t computeTarget(ByConstRef /*Source*/) const { + return Facts; + } + + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { + llvm::report_fatal_error("Implemented in 'extend'"); + } + + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { + llvm::report_fatal_error("Implemented in 'combine'"); + } + + constexpr friend bool operator==(const GenerateEF &L, const GenerateEF &R) { + return L.Facts == R.Facts; + } + + constexpr friend llvm::hash_code hash_value(const GenerateEF &EF) { + return hash_value(EF.Facts); + } +}; + +namespace { +struct AddSmallFactsEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; + + uintptr_t Facts{}; + + [[nodiscard]] l_t computeTarget(l_t Source) const { + Source.unionWith(Facts); + return Source; + } + + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { + llvm::report_fatal_error("Implemented in 'extend'"); + } + + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { + llvm::report_fatal_error("Implemented in 'combine'"); + } + + // NOLINTNEXTLINE -- unused function -- must satisfy the EF interface + friend bool operator==(const AddSmallFactsEF &L, const AddSmallFactsEF &R) { + return L.Facts == R.Facts; + } + + // NOLINTNEXTLINE -- unused function -- must satisfy the EF interface + friend llvm::hash_code hash_value(const AddSmallFactsEF &EF) { + return llvm::hash_value(EF.Facts); + } +}; + +struct GenerateSmallEF { + using l_t = IDEFeatureTaintAnalysisDomain::l_t; + + uintptr_t Facts{}; + + [[nodiscard]] bool isConstant() const noexcept { return true; } + + [[nodiscard]] l_t computeTarget(ByConstRef /*Source*/) const { + return Facts; + } + + static EdgeFunction + compose(EdgeFunctionRef /*This*/, + const EdgeFunction & /*SecondFunction*/) { + llvm::report_fatal_error("Implemented in 'extend'"); + } + + static EdgeFunction join(EdgeFunctionRef /*This*/, + const EdgeFunction & /*OtherFunction*/) { + llvm::report_fatal_error("Implemented in 'combine'"); + } + + // NOLINTNEXTLINE -- unused function -- must satisfy the EF interface + friend llvm::hash_code hash_value(const GenerateSmallEF &EF) { + return llvm::hash_value(EF.Facts); + } + + // NOLINTNEXTLINE -- unused function -- must satisfy the EF interface + friend bool operator==(GenerateSmallEF L, GenerateSmallEF R) { + return L.Facts == R.Facts; + } + + // NOLINTNEXTLINE -- unused function -- used in EdgeFunction + friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + GenerateSmallEF EF) { + return OS << "GenerateSmallEF" << LToString(EF.computeTarget(0)); + } +}; + +/// + +template +EdgeFunction genEF(l_t &&Facts, CacheT &GenEFCache) { + if (Facts.Taints.isSmall()) { + uintptr_t Buf{}; + std::ignore = Facts.Taints.getData(Buf); + return GenerateSmallEF{Buf}; + } + return GenEFCache.createEdgeFunction(std::move(Facts)); +} + +template +EdgeFunction addEF(l_t &&Facts, CacheT &AddEFCache) { + if (Facts.Taints.isSmall()) { + uintptr_t Buf{}; + std::ignore = Facts.Taints.getData(Buf); + return AddSmallFactsEF{Buf}; + } + return AddEFCache.createEdgeFunction(std::move(Facts)); +} + +template +std::pair +extractFacts(const EdgeFunction &EF) { + if (const auto *GenEF = llvm::dyn_cast(EF)) { + return {GenEF->Facts, nullptr}; + } + if (const auto *AddEF = llvm::dyn_cast(EF)) { + return {AddEF->Facts, nullptr}; + } + if (const auto *GenEF = llvm::dyn_cast(EF)) { + return {0, &GenEF->Facts}; + } + if (const auto *AddEF = llvm::dyn_cast(EF)) { + return {0, &AddEF->Facts}; + } + llvm_unreachable("All edge function types handled"); +} + +template +IDEFeatureTaintEdgeFact unionTaints(const EdgeFunction &FirstEF, + const EdgeFunction &OtherEF) { + + auto [FirstSmallFacts, FirstLargeFacts] = + extractFacts(FirstEF); + auto [OtherSmallFacts, OtherLargeFacts] = + extractFacts(OtherEF); + + if (FirstLargeFacts) { + IDEFeatureTaintEdgeFact Ret = *FirstLargeFacts; + if (OtherLargeFacts) { + Ret.unionWith(*OtherLargeFacts); + } else { + Ret.unionWith(OtherSmallFacts); + } + return Ret; + } + if (OtherLargeFacts) { + IDEFeatureTaintEdgeFact Ret = *OtherLargeFacts; + Ret.unionWith(FirstSmallFacts); + return Ret; + } + // Both Small + FirstSmallFacts |= OtherSmallFacts; + return FirstSmallFacts; +} + +EdgeFunction iiaDefaultJoinOrNull(const EdgeFunction &This, + const EdgeFunction &OtherFunction) { + if (llvm::isa>(OtherFunction) || + llvm::isa>(This)) { + return OtherFunction; + } + + // Due to our caching, we can do a reference-equals here + if (llvm::isa>(OtherFunction) || + OtherFunction.referenceEquals(This) || llvm::isa>(This)) { + return This; + } + if (llvm::isa>(OtherFunction)) { + return AllBottom{}; + } + return nullptr; +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +EdgeFunction +IDEFeatureTaintAnalysis::extend(const EdgeFunction &FirstEF, + const EdgeFunction &SecondEF) { + + if (auto Default = defaultComposeOrNull(FirstEF, SecondEF)) { + return Default; + } + + auto Val = SecondEF.computeTarget(FirstEF.computeTarget(0)); + + if (FirstEF.isConstant()) { + return genEF(std::move(Val), GenEFCache); + } + + return addEF(std::move(Val), AddEFCache); +} + +EdgeFunction +IDEFeatureTaintAnalysis::combine(const EdgeFunction &FirstEF, + const EdgeFunction &OtherEF) { + + /// XXX: Here, we underapproximate joins with EdgeIdentity + if (llvm::isa>(FirstEF)) { + return OtherEF; + } + if (llvm::isa>(OtherEF) && + !llvm::isa>(FirstEF)) { + return FirstEF; + } + + if (auto Default = iiaDefaultJoinOrNull(FirstEF, OtherEF)) { + return Default; + } + + auto ThisFacts = unionTaints(FirstEF, OtherEF); + + if (FirstEF.isConstant() && OtherEF.isConstant()) { + return genEF(std::move(ThisFacts), GenEFCache); + } + + return addEF(std::move(ThisFacts), AddEFCache); +} + +auto IDEFeatureTaintAnalysis::getNormalEdgeFunction(n_t Curr, d_t CurrNode, + n_t /* Succ */, + d_t SuccNode) + -> EdgeFunction { + + if (isZeroValue(SuccNode) || CurrNode == SuccNode) { + // We don't want to propagate any facts on zero + return EdgeIdentity{}; + } + + if (isZeroValue(CurrNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(Curr), GenEFCache); + } + + // Otherwise stick to identity. + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::getCallEdgeFunction(n_t CallSite, d_t SrcNode, + f_t /*DestinationFunction*/, + d_t DestNode) + -> EdgeFunction { + if (isZeroValue(SrcNode) && !isZeroValue(DestNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); + } + + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::getReturnEdgeFunction( + n_t /*CallSite*/, f_t /*CalleeFunction*/, n_t ExitStmt, d_t ExitNode, + n_t /*RetSite*/, d_t RetNode) -> EdgeFunction { + if (isZeroValue(ExitNode) && !isZeroValue(RetNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(ExitStmt), GenEFCache); + } + + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::getCallToRetEdgeFunction( + n_t CallSite, d_t CallNode, n_t /*RetSite*/, d_t RetSiteNode, + llvm::ArrayRef /*Callees*/) -> EdgeFunction { + if (isZeroValue(CallNode) && !isZeroValue(RetSiteNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(CallSite), GenEFCache); + } + + // Capture interactions of the call instruction and its arguments. + const auto *CS = llvm::dyn_cast(CallSite); + for (const auto &Arg : CS->args()) { + // + // o_i --> o_i + // + // Edge function: + // + // o_i + // | + // %i = call o_i | \ \x.x \cup { commit of('%i = call H') } + // v + // o_i + // + if (CallNode == Arg && CallNode == RetSiteNode) { + return addEF(TaintGen.getGeneratedTaintsAt(CallSite), AddEFCache); + } + } + + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::getSummaryEdgeFunction(n_t Curr, d_t CurrNode, + n_t /*Succ*/, d_t SuccNode) + -> EdgeFunction { + if (isZeroValue(CurrNode) && !isZeroValue(SuccNode)) { + // Generate user edge-facts from zero + return genEF(TaintGen.getGeneratedTaintsAt(Curr), GenEFCache); + } + + return EdgeIdentity{}; +} + +auto IDEFeatureTaintAnalysis::initialSeeds() -> InitialSeeds { + InitialSeeds Seeds; + + LLVMBasedCFG CFG; + forallStartingPoints(this->EntryPoints, IRDB, CFG, [this, &Seeds](n_t SP) { + // Set initial seeds at the required entry points and generate the global + // variables using generalized initial seeds + + // Generate zero value at the entry points + Seeds.addSeed(SP, this->getZeroValue(), 0); + // Generate formal parameters of entry points, e.g. main(). Formal + // parameters will otherwise cause trouble by overriding alloca + // instructions without being valid data-flow facts themselves. + + // Generate all global variables using generalized initial seeds + for (const auto &G : this->IRDB->getModule()->globals()) { + if (const auto *GV = llvm::dyn_cast(&G)) { + l_t InitialValues = TaintGen.getGeneratedTaintsAt(GV); + if (InitialValues.Taints.any()) { + Seeds.addSeed(SP, GV, std::move(InitialValues)); + } + } + } + }); + + return Seeds; +} + +bool IDEFeatureTaintAnalysis::isZeroValue(d_t FlowFact) const noexcept { + return LLVMZeroValue::isLLVMZeroValue(FlowFact); +} + +void IDEFeatureTaintAnalysis::emitTextReport( + GenericSolverResults SR, llvm::raw_ostream &OS) { + OS << "\n====================== IDE-Inst-Interaction-Analysis Report " + "======================\n"; + + for (const auto *F : IRDB->getAllFunctions()) { + auto FunName = F->getName(); + OS << "\nFunction: " << FunName << "\n----------" + << std::string(FunName.size(), '-') << '\n'; + + for (const auto &Inst : llvm::instructions(F)) { + auto Results = SR.resultsAt(&Inst, true); + + if (!Results.empty()) { + OS << "At IR statement: " << NToString(Inst) << '\n'; + for (const auto &Result : Results) { + if (!Result.second.isBottom()) { + OS << " Fact: " << DToString(Result.first) + << "\n Value: " << TaintGen.toString(Result.second) << '\n'; + } + } + OS << '\n'; + } + } + OS << '\n'; + } +} diff --git a/test/llvm_test_code/inst_interaction/CMakeLists.txt b/test/llvm_test_code/inst_interaction/CMakeLists.txt index ecd10f3a7c..6fdfa88e8b 100644 --- a/test/llvm_test_code/inst_interaction/CMakeLists.txt +++ b/test/llvm_test_code/inst_interaction/CMakeLists.txt @@ -22,11 +22,15 @@ set(Sources call_07.cpp global_01.cpp global_02.cpp + global_05.cpp heap_01.cpp KillTest_01.cpp KillTest_02.cpp return_01.cpp rvo_01.cpp + rvo_02.cpp + rvo_03.cpp + rvo_04.cpp struct_01.cpp struct_02.cpp ) diff --git a/test/llvm_test_code/inst_interaction/global_05.cpp b/test/llvm_test_code/inst_interaction/global_05.cpp new file mode 100644 index 0000000000..e71fb9069e --- /dev/null +++ b/test/llvm_test_code/inst_interaction/global_05.cpp @@ -0,0 +1,12 @@ +#include + +int g = 0; + +void init() { g = 1; } + +int foo() { return g; } + +int main() { + init(); + std::cout << foo(); +} diff --git a/test/llvm_test_code/inst_interaction/rvo_02.cpp b/test/llvm_test_code/inst_interaction/rvo_02.cpp new file mode 100644 index 0000000000..113e3bfe93 --- /dev/null +++ b/test/llvm_test_code/inst_interaction/rvo_02.cpp @@ -0,0 +1,13 @@ +#include + +int g = 0; +void functionWithoutInput() { g = 42; } +std::string createString() { return "My String"; } + +int main() { + std::string str; + functionWithoutInput(); + str = "1234"; + str = createString(); + return str.size(); +} diff --git a/test/llvm_test_code/inst_interaction/rvo_03.cpp b/test/llvm_test_code/inst_interaction/rvo_03.cpp new file mode 100644 index 0000000000..f959b899d1 --- /dev/null +++ b/test/llvm_test_code/inst_interaction/rvo_03.cpp @@ -0,0 +1,54 @@ + +#include +#include +#include + +class String { +public: + String() noexcept = default; + + String(const char *Data) noexcept : Length(strlen(Data)) { + auto *Dat = new char[Length]; + this->Data = Dat; + memcpy(Dat, Data, Length); + } + ~String() { delete[] Data; } + + String(String &&Other) noexcept : Data(Other.Data), Length(Other.Length) { + Other.Data = nullptr; + Other.Length = 0; + } + + void swap(String &Other) noexcept { + const auto *Dat = Data; + Data = Other.Data; + Other.Data = Dat; + + auto Len = Length; + Length = Other.Length; + Other.Length = Len; + } + + String &operator=(String &&Other) noexcept { + String(std::move(Other)).swap(*this); + return *this; + } + + [[nodiscard]] size_t size() const noexcept { return Length; } + +private: + const char *Data{}; + size_t Length{}; +}; + +int g = 0; +void functionWithoutInput() { g = 42; } +String createString() noexcept { return "My String"; } + +int main() { + String Str; + functionWithoutInput(); + Str = "1234"; + Str = createString(); + return Str.size(); +} diff --git a/test/llvm_test_code/inst_interaction/rvo_04.cpp b/test/llvm_test_code/inst_interaction/rvo_04.cpp new file mode 100644 index 0000000000..cebb85a9e1 --- /dev/null +++ b/test/llvm_test_code/inst_interaction/rvo_04.cpp @@ -0,0 +1,20 @@ + +#include +#include +#include + +struct Foo { + int X = 0; + + Foo() noexcept = default; + // NOLINTNEXTLINE + Foo(const Foo &Other) noexcept : X(Other.X) {} +}; + +Foo createFoo() { return {}; } + +int main() { + Foo F; + F = createFoo(); + return F.X; +} diff --git a/tools/phasar-cli/Controller/AnalysisController.cpp b/tools/phasar-cli/Controller/AnalysisController.cpp index 8ad63474e2..27fb9d2f9c 100644 --- a/tools/phasar-cli/Controller/AnalysisController.cpp +++ b/tools/phasar-cli/Controller/AnalysisController.cpp @@ -154,6 +154,9 @@ static void executeWholeProgram(AnalysisController &Data) { case DataFlowAnalysisType::IDEInstInteractionAnalysis: executeIDEIIA(Data); continue; + case DataFlowAnalysisType::IDEFeatureTaintAnalysis: + executeIDEFIIA(Data); + continue; case DataFlowAnalysisType::IntraMonoFullConstantPropagation: executeIntraMonoFullConstant(Data); continue; diff --git a/tools/phasar-cli/Controller/AnalysisControllerInternal.h b/tools/phasar-cli/Controller/AnalysisControllerInternal.h index 019cc122f2..8576c51438 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerInternal.h +++ b/tools/phasar-cli/Controller/AnalysisControllerInternal.h @@ -42,6 +42,7 @@ LLVM_LIBRARY_VISIBILITY void executeIDECSTDIOTS(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIDELinearConst(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIDESolverTest(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIDEIIA(AnalysisController &Data); +LLVM_LIBRARY_VISIBILITY void executeIDEFIIA(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIntraMonoFullConstant(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void diff --git a/tools/phasar-cli/Controller/AnalysisControllerXIDEFIIA.cpp b/tools/phasar-cli/Controller/AnalysisControllerXIDEFIIA.cpp new file mode 100644 index 0000000000..6bc04e31aa --- /dev/null +++ b/tools/phasar-cli/Controller/AnalysisControllerXIDEFIIA.cpp @@ -0,0 +1,44 @@ +/****************************************************************************** + * Copyright (c) 2022 Martin Mory. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Martin Mory and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" + +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Casting.h" + +#include "AnalysisControllerInternalIDE.h" + +using namespace psr; + +void controller::executeIDEFIIA(AnalysisController &Data) { + // use Phasar's instruction ids as testing labels + auto Generator = + [](std::variant + Current) -> std::set { + return std::visit( + [](const auto *InstOrGlob) -> std::set { + std::set Labels; + if (InstOrGlob->hasMetadata()) { + std::string Label = + llvm::cast( + InstOrGlob->getMetadata(PhasarConfig::MetaDataKind()) + ->getOperand(0)) + ->getString() + .str(); + Labels.insert(Label); + } + return Labels; + }, + Current); + }; + + executeIDEAnalysis(Data, Data.EntryPoints, + Generator); +} diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 25388cf790..405738b1f8 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -16,13 +16,14 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux") if ("${UBUNTU_MAJOR_VERSION}" GREATER 22) # TODO tests shouldn't be flaky - list(APPEND additional_args -E "\"(LLVMBasedCFGTest|LLVMBasedICFGGlobCtorDtorTest|IDEInstInteractionAnalysisTest|IFDSUninitializedVariablesTest|IDEGeneralizedLCATest|IDEExtendedTaintAnalysisTest)\"") + list(APPEND additional_args -E "\"(LLVMBasedCFGTest|LLVMBasedICFGGlobCtorDtorTest|IDEInstInteractionAnalysisTest|IFDSUninitializedVariablesTest|IDEGeneralizedLCATest|IDEExtendedTaintAnalysisTest|IDEFeatureTaintAnalysis)\"") # LLVMBasedCFGTest.HandlesCppStandardType # IDEInstInteractionAnalysisTest.HandleBasicTest_04 # IFDSUninitializedVariablesTest.UninitTest_05_SHOULD_NOT_LEAK # .UninitTest_06_SHOULD_NOT_LEAK # IDEGeneralizedLCATest.StringTestCpp # IDEExtendedTaintAnalysisTest.XTaint09 + # IDEFeatureTaintAnalysis.HandleBasicTest_04, HandleGlobalTest_05 endif() endif() diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt index 708ce3bcbc..7d5a903495 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/CMakeLists.txt @@ -8,6 +8,7 @@ set(IfdsIdeProblemSources IDEGeneralizedLCATest.cpp IDEExtendedTaintAnalysisTest.cpp IDETSAnalysisFileIOTest.cpp + IDEFeatureTaintAnalysisTest.cpp ) if(PHASAR_BUILD_OPENSSL_TS_UNITTESTS) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp new file mode 100644 index 0000000000..cf0d4ac181 --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysisTest.cpp @@ -0,0 +1,555 @@ +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDEFeatureTaintAnalysis.h" + +#include "phasar/Config/Configuration.h" +#include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/LLVMSolverResults.h" +#include "phasar/PhasarLLVM/HelperAnalyses.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/BitVectorSet.h" +#include "phasar/Utils/Logger.h" + +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" + +#include "TestConfig.h" +#include "gtest/gtest.h" + +namespace { + +using namespace psr; + +static std::string printSet(const std::set &EdgeFact) { + std::string Ret; + llvm::raw_string_ostream ROS(Ret); + llvm::interleaveComma(EdgeFact, ROS << '<'); + ROS << '>'; + return Ret; +} + +/* ============== TEST FIXTURE ============== */ +class IDEFeatureTaintAnalysisTest : public ::testing::Test { +protected: + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("inst_interaction/"); + + // Function - Line Nr - Variable - Values + using IIACompactResult_t = + std::tuple>; + + std::optional HA; + LLVMProjectIRDB *IRDB{}; + + void initializeIR(const llvm::Twine &LlvmFilePath, + const std::vector &EntryPoints = {"main"}) { + HA.emplace(PathToLlFiles + LlvmFilePath, EntryPoints, + HelperAnalysisConfig{}.withCGType(CallGraphAnalysisType::CHA)); + IRDB = &HA->getProjectIRDB(); + + for (const auto &Glob : IRDB->getModule()->globals()) { + BitVectorSet BV; + BV.insert(getMetaDataID(&Glob)); + } + + // Initialze IDs + for (const auto *Inst : IRDB->getAllInstructions()) { + BitVectorSet BV; + BV.insert(getMetaDataID(Inst)); + } + } + + void + doAnalysisAndCompareResults(const std::string &LlvmFilePath, + const std::vector &EntryPoints, + const std::set &GroundTruth, + bool PrintDump = false) { + initializeIR(LlvmFilePath, EntryPoints); + + // IDEInstInteractionAnalysisT IIAProblem(IRDB, &ICFG, + // &PT, + // EntryPoints); + + // use Phasar's instruction ids as testing labels + auto Generator = + [](std::variant + Current) -> std::set { + return std::visit( + [](const auto *InstOrGlob) -> std::set { + std::set Labels; + if (InstOrGlob->hasMetadata()) { + std::string Label = + llvm::cast( + InstOrGlob->getMetadata(PhasarConfig::MetaDataKind()) + ->getOperand(0)) + ->getString() + .str(); + Labels.insert(Label); + } + return Labels; + }, + Current); + }; + assert(HA); + auto IIAProblem = createAnalysisProblem( + *HA, EntryPoints, Generator); + + // if (PrintDump) { + // psr::Logger::initializeStderrLogger(SeverityLevel::DEBUG); + // } + + IDESolver IIASolver(IIAProblem, &HA->getICFG()); + IIASolver.solve(); + // if (PrintDump) { + // // IRDB->emitPreprocessedIR(llvm::outs()); + // IIASolver.dumpResults(); + // llvm::errs() + // << "\n======================================================\n"; + // printDump(HA->getProjectIRDB(), IIASolver.getSolverResults()); + // } + // do the comparison + for (const auto &[FunName, SrcLine, VarName, LatticeVal] : GroundTruth) { + const auto *Fun = IRDB->getFunctionDefinition(FunName); + const auto *IRLine = getNthInstruction(Fun, SrcLine); + auto ResultMap = IIASolver.resultsAt(IRLine); + assert(IRLine && "Could not retrieve IR line!"); + bool FactFound = false; + for (auto &[Fact, Value] : ResultMap) { + std::string FactStr; + llvm::raw_string_ostream RSO(FactStr); + RSO << *Fact; + llvm::StringRef FactRef(FactStr); + if (FactRef.ltrim().startswith("%" + VarName + " ") || + FactRef.ltrim().startswith("@" + VarName + " ")) { + PHASAR_LOG_LEVEL(DFADEBUG, "Checking variable: " << FactStr); + EXPECT_EQ(LatticeVal, Value.toSet()) + << "Value do not match for Variable '" << VarName + << "': Expected " << printSet(LatticeVal) + << "; got: " << LToString(Value.toBVSet()); + FactFound = true; + } + } + + EXPECT_TRUE(FactFound) << "Variable '" << VarName << "' missing at '" + << llvmIRToString(IRLine) << "'."; + } + + if (PrintDump || HasFailure()) { + IIASolver.dumpResults(llvm::errs()); + llvm::errs() + << "\n======================================================\n"; + printDump(HA->getProjectIRDB(), IIASolver.getSolverResults()); + } + } + + void TearDown() override {} + + // See vara::PhasarTaintAnalysis::taintsForInst + [[nodiscard]] inline std::set + taintsForInst(const llvm::Instruction *Inst, + SolverResults + SR) { + + if (const auto *Ret = llvm::dyn_cast(Inst)) { + if (Ret->getNumOperands() == 0) { + return {}; + } + } else if (llvm::isa(Inst)) { + return {}; + } + + std::set AggregatedTaints; + + if (Inst->getType()->isVoidTy()) { // For void types, we need to look what + // taints flow into the inst + + // auto Results = SR.resultsAt(Inst); + assert(Inst->getNumOperands() >= 1 && + "Found case without first operand."); + AggregatedTaints = + SR.resultAt(Inst, Inst->getOperand(0)).toSet(); + + } else { + auto Results = SR.resultsAtInLLVMSSA(Inst); + auto SearchPosTaints = Results.find(Inst); + if (SearchPosTaints != Results.end()) { + AggregatedTaints = SearchPosTaints->second.toSet(); + } + } + + // additionalStaticTaints + AggregatedTaints.insert(getMetaDataID(Inst)); + + return AggregatedTaints; + } + + void printDump(const LLVMProjectIRDB &IRDB, + SolverResults + SR) { + const llvm::Function *CurrFun = nullptr; + for (const auto *Inst : IRDB.getAllInstructions()) { + if (CurrFun != Inst->getFunction()) { + CurrFun = Inst->getFunction(); + llvm::errs() << "\n=================== '" << CurrFun->getName() + << "' ===================\n"; + } + llvm::errs() << " N: " << llvmIRToString(Inst) << '\n'; + llvm::errs() << " D: " << printSet(taintsForInst(Inst, SR)) << "\n\n"; + } + } + +}; // Test Fixture + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 9, "i", std::set{"4"}); + GroundTruth.emplace("main", 9, "j", + std::set{"4", "5", "6", "7"}); + GroundTruth.emplace("main", 9, "retval", std::set{"3"}); + doAnalysisAndCompareResults("basic_01_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_02) { + std::set GroundTruth; + GroundTruth.emplace("main", 24, "retval", std::set{"6"}); + GroundTruth.emplace("main", 24, "argc.addr", std::set{"7"}); + GroundTruth.emplace("main", 24, "argv.addr", std::set{"8"}); + GroundTruth.emplace("main", 24, "i", std::set{"16", "18"}); + GroundTruth.emplace("main", 24, "j", + std::set{"9", "10", "11", "12"}); + GroundTruth.emplace("main", 24, "k", + std::set{"21", "16", "18", "20"}); + doAnalysisAndCompareResults("basic_02_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_03) { + std::set GroundTruth; + GroundTruth.emplace("main", 20, "retval", std::set{"3"}); + GroundTruth.emplace("main", 20, "i", + std::set{"4", "10", "11", "12"}); + GroundTruth.emplace("main", 20, "x", + std::set{"5", "14", "15", "16"}); + doAnalysisAndCompareResults("basic_03_cpp.ll", {"main"}, GroundTruth, false); +} + +PHASAR_SKIP_TEST(TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_04) { + // If we use libcxx this won't work since internal implementation is different + LIBCPP_GTEST_SKIP; + + std::set GroundTruth; + GroundTruth.emplace("main", 23, "retval", std::set{"13"}); + GroundTruth.emplace("main", 23, "argc.addr", std::set{"14"}); + GroundTruth.emplace("main", 23, "argv.addr", std::set{"15"}); + GroundTruth.emplace("main", 23, "i", std::set{"16"}); + GroundTruth.emplace("main", 23, "j", + std::set{"16", "17", "19", "18"}); + GroundTruth.emplace( + "main", 23, "k", + std::set{"16", "17", "18", "19", "20", "24", "25"}); + doAnalysisAndCompareResults("basic_04_cpp.ll", {"main"}, GroundTruth, false); +}) + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_05) { + std::set GroundTruth; + GroundTruth.emplace("main", 11, "i", std::set{"5", "7"}); + GroundTruth.emplace("main", 11, "retval", std::set{"2"}); + doAnalysisAndCompareResults("basic_05_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_06) { + std::set GroundTruth; + GroundTruth.emplace("main", 19, "retval", std::set{"5"}); + GroundTruth.emplace("main", 19, "i", std::set{"15", "6", "13"}); + GroundTruth.emplace("main", 19, "j", std::set{"15", "6", "13"}); + GroundTruth.emplace("main", 19, "k", std::set{"6"}); + GroundTruth.emplace("main", 19, "p", + std::set{"1", "2", "9", "11"}); + doAnalysisAndCompareResults("basic_06_cpp.ll", {"main"}, GroundTruth, true); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_07) { + std::set GroundTruth; + GroundTruth.emplace("main", 15, "retval", std::set{"5"}); + GroundTruth.emplace("main", 15, "argc.addr", std::set{"6"}); + GroundTruth.emplace("main", 15, "argv.addr", std::set{"7"}); + GroundTruth.emplace("main", 15, "i", std::set{"12"}); + GroundTruth.emplace("main", 15, "j", + std::set{"8", "9", "10", "11"}); + doAnalysisAndCompareResults("basic_07_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_08) { + std::set GroundTruth; + GroundTruth.emplace("main", 12, "retval", std::set{"2"}); + GroundTruth.emplace("main", 12, "i", std::set{"9"}); + doAnalysisAndCompareResults("basic_08_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_09) { + std::set GroundTruth; + GroundTruth.emplace("main", 10, "i", std::set{"4"}); + GroundTruth.emplace("main", 10, "j", std::set{"4", "6", "7"}); + GroundTruth.emplace("main", 10, "retval", std::set{"3"}); + doAnalysisAndCompareResults("basic_09_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_10) { + std::set GroundTruth; + GroundTruth.emplace("main", 6, "i", std::set{"3"}); + GroundTruth.emplace("main", 6, "retval", std::set{"2"}); + doAnalysisAndCompareResults("basic_10_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleBasicTest_11) { + std::set GroundTruth; + GroundTruth.emplace("main", 20, "FeatureSelector", + std::set{"5", "7", "8"}); + GroundTruth.emplace("main", 20, "retval", std::set{"11", "16"}); + doAnalysisAndCompareResults("basic_11_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 14, "retval", std::set{"8"}); + GroundTruth.emplace("main", 14, "i", std::set{"9"}); + GroundTruth.emplace("main", 14, "j", + std::set{"12", "9", "10", "11"}); + GroundTruth.emplace( + "main", 14, "k", + std::set{"15", "1", "2", "13", "14", "12", "9", "10", "11"}); + doAnalysisAndCompareResults("call_01_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_02) { + std::set GroundTruth; + GroundTruth.emplace("main", 13, "retval", std::set{"12"}); + GroundTruth.emplace("main", 13, "i", std::set{"13"}); + GroundTruth.emplace("main", 13, "j", std::set{"14"}); + GroundTruth.emplace("main", 13, "k", + std::set{"4", "5", "15", "6", "3", "14", "2", + "13", "16", "17", "18"}); + doAnalysisAndCompareResults("call_02_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_03) { + std::set GroundTruth; + GroundTruth.emplace("main", 10, "retval", std::set{"20"}); + GroundTruth.emplace("main", 10, "i", std::set{"21"}); + GroundTruth.emplace("main", 10, "j", + std::set{"22", "23", "15", "6", "21", "2", + "13", "8", "9", "11", "12", "10", + "24"}); + doAnalysisAndCompareResults("call_03_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_04) { + std::set GroundTruth; + GroundTruth.emplace("main", 20, "retval", std::set{"33"}); + GroundTruth.emplace("main", 20, "i", std::set{"34"}); + GroundTruth.emplace("main", 20, "j", + std::set{"15", "6", "2", "13", "8", "9", + "11", "12", "10", "35", "36", "34", + "37"}); + GroundTruth.emplace("main", 20, "k", + std::set{"41", "19", "15", "6", "44", "2", + "13", "8", "45", "18", "9", "11", + "12", "10", "46", "24", "25", "35", + "36", "27", "23", "26", "38", "34", + "37", "42", "43", "39", "40"}); + doAnalysisAndCompareResults("call_04_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_05) { + std::set GroundTruth; + GroundTruth.emplace("main", 10, "retval", std::set{"8"}); + GroundTruth.emplace("main", 10, "i", std::set{"3", "11", "9"}); + GroundTruth.emplace("main", 10, "j", std::set{"3", "10", "12"}); + doAnalysisAndCompareResults("call_05_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_06) { + // NOTE: Here we are suffering from IntraProceduralAliasesOnly + std::set GroundTruth; + GroundTruth.emplace("main", 24, "retval", std::set{"11"}); + GroundTruth.emplace( + "main", 24, "i", + std::set{"3", "1", "2", "16", "17", "18", "12"}); + GroundTruth.emplace( + "main", 24, "j", + std::set{"19", "20", "21", "3", "1", "2", "13"}); + GroundTruth.emplace( + "main", 24, "k", + std::set{"22", "23", "3", "14", "1", "2", "24"}); + GroundTruth.emplace( + "main", 24, "l", + std::set{"15", "3", "1", "2", "25", "26", "27"}); + doAnalysisAndCompareResults("call_06_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleCallTest_07) { + std::set GroundTruth; + GroundTruth.emplace("main", 6, "retval", std::set{"7"}); + GroundTruth.emplace("main", 6, "VarIR", std::set{"6", "3", "8"}); + doAnalysisAndCompareResults("call_07_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 9, "retval", std::set{"3"}); + GroundTruth.emplace("main", 9, "i", std::set{"7"}); + GroundTruth.emplace("main", 9, "j", std::set{"0", "5", "6"}); + doAnalysisAndCompareResults("global_01_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_02) { + std::set GroundTruth; + GroundTruth.emplace("_Z5initBv", 2, "a", std::set{"0"}); + GroundTruth.emplace("_Z5initBv", 2, "b", std::set{"2"}); + GroundTruth.emplace("main", 12, "a", std::set{"0"}); + GroundTruth.emplace("main", 12, "b", std::set{"2"}); + GroundTruth.emplace("main", 12, "retval", std::set{"6"}); + GroundTruth.emplace("main", 12, "c", std::set{"1", "8", "7"}); + doAnalysisAndCompareResults("global_02_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_03) { + std::set GroundTruth; + GroundTruth.emplace("main", 1, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("main", 2, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("main", 17, "GlobalFeature", std::set{"0"}); + doAnalysisAndCompareResults("global_03_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_04) { + std::set GroundTruth; + GroundTruth.emplace("main", 1, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("main", 2, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("main", 17, "GlobalFeature", std::set{"0"}); + GroundTruth.emplace("_Z7doStuffi", 1, "GlobalFeature", + std::set{"0"}); + GroundTruth.emplace("_Z7doStuffi", 2, "GlobalFeature", + std::set{"0"}); + doAnalysisAndCompareResults("global_04_cpp.ll", {"main", "_Z7doStuffi"}, + GroundTruth, false); +} +TEST_F(IDEFeatureTaintAnalysisTest, HandleGlobalTest_05) { + std::set GroundTruth; + + // NOTE: Facts at init() should be empty, except for its own ID; + // g should be strongly updated + + GroundTruth.emplace("main", 1, "g", std::set{"2"}); + GroundTruth.emplace("main", 2, "g", std::set{"8"}); + GroundTruth.emplace("main", 4, "call", + std::set{"10", "13", "14", "8"}); + GroundTruth.emplace("main", 4, "g", std::set{"8"}); + + doAnalysisAndCompareResults("global_05_cpp.ll", {"main"}, GroundTruth, true); +} + +TEST_F(IDEFeatureTaintAnalysisTest, KillTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 12, "retval", std::set{"4"}); + GroundTruth.emplace("main", 12, "i", std::set{"5"}); + GroundTruth.emplace("main", 12, "j", std::set{"10"}); + GroundTruth.emplace("main", 12, "k", std::set{"9", "8", "5"}); + doAnalysisAndCompareResults("KillTest_01_cpp.ll", {"main"}, GroundTruth, + false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, KillTest_02) { + std::set GroundTruth; + GroundTruth.emplace("main", 12, "retval", std::set{"6"}); + GroundTruth.emplace("main", 12, "A", std::set{"0"}); + GroundTruth.emplace("main", 12, "B", std::set{"2"}); + GroundTruth.emplace("main", 12, "C", std::set{"1", "7", "8"}); + doAnalysisAndCompareResults("KillTest_02_cpp.ll", {"main"}, GroundTruth, + false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleReturnTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 6, "retval", std::set{"3"}); + GroundTruth.emplace("main", 6, "localVar", std::set{"4"}); + GroundTruth.emplace("main", 6, "call", std::set{"0", "5"}); + GroundTruth.emplace("main", 8, "localVar", + std::set{"0", "5", "6"}); + GroundTruth.emplace("main", 8, "call", std::set{"0", "5"}); + doAnalysisAndCompareResults("return_01_cpp.ll", {"main"}, GroundTruth, false); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleHeapTest_01) { + std::set GroundTruth; + GroundTruth.emplace("main", 17, "retval", std::set{"3"}); + GroundTruth.emplace("main", 17, "i", std::set{"5", "6"}); + GroundTruth.emplace("main", 17, "j", + std::set{"5", "6", "7", "8", "9"}); + doAnalysisAndCompareResults("heap_01_cpp.ll", {"main"}, GroundTruth, false); +} + +PHASAR_SKIP_TEST(TEST_F(IDEFeatureTaintAnalysisTest, HandleRVOTest_01) { + GTEST_SKIP() << "This test heavily depends on the used stdlib version. TODO: " + "add a better one"; + + std::set GroundTruth; + GroundTruth.emplace("main", 16, "retval", std::set{"75", "76"}); + GroundTruth.emplace("main", 16, "str", + std::set{"70", "65", "72", "74", "77"}); + GroundTruth.emplace("main", 16, "ref.tmp", + std::set{"66", "9", "72", "73", "71"}); + doAnalysisAndCompareResults("rvo_01_cpp.ll", {"main"}, GroundTruth, false); +}) + +PHASAR_SKIP_TEST(TEST_F(IDEFeatureTaintAnalysisTest, HandleRVOTest_02) { + GTEST_SKIP() << "This test heavily depends on the used stdlib version. TODO: " + "add a better one"; + + std::set GroundTruth; + GroundTruth.emplace("main", 18, "retval", std::set{"75", "76"}); + GroundTruth.emplace("main", 18, "str", + std::set{"70", "65", "72", "74", "77"}); + GroundTruth.emplace("main", 18, "ref.tmp", + std::set{"66", "9", "72", "73", "71"}); + doAnalysisAndCompareResults("rvo_02_cpp.ll", {"main"}, GroundTruth, true); +}) + +TEST_F(IDEFeatureTaintAnalysisTest, HandleRVOTest_03) { + std::set GroundTruth; + + GroundTruth.emplace( + "main", 19, "Str", + std::set{"39", "43", "46", "49", "51", "54", "63"}); + GroundTruth.emplace("main", 19, "ref.tmp", + std::set{"13", "19", "2", "20", "23", "24", + "25", "27", "29", "32", "40", "45", + "46", "47"}); + GroundTruth.emplace("main", 19, "ref.tmp1", + std::set{"1", "13", "19", "20", "23", "24", + "25", "27", "29", "32", "48", "49", + "50"}); + doAnalysisAndCompareResults("rvo_03_cpp.ll", {"main"}, GroundTruth, true); +} + +TEST_F(IDEFeatureTaintAnalysisTest, HandleRVOTest_04) { + std::set GroundTruth; + GroundTruth.emplace("main", 10, "retval", std::set{"14"}); + GroundTruth.emplace( + "main", 10, "F", + std::set{"12", "15", "16", "17", "2", "3", "9"}); + GroundTruth.emplace("main", 10, "ref.tmp", + std::set{"16", "17", "2", "3", "9"}); + doAnalysisAndCompareResults("rvo_04_cpp.ll", {"main"}, GroundTruth, true); +} + +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +}