diff --git a/README.md b/README.md index 03aed26..7877c6b 100644 --- a/README.md +++ b/README.md @@ -319,6 +319,9 @@ If you wish to perform a path search such as the above, but without reporting pr use the `Qtil::GraphPathSearch` module instead, which provides an efficient search algorithm without producing a `@kind path-problem` query. +For a custom path problem with stateful flow tracking (see `GraphPathStateSearch` for more info), +use `CustomPathStateProblem`. + ### Inheritance **Instance**: A module to make `instanceof` inheritance easier in CodeQL, by writing @@ -433,6 +436,37 @@ This module takes a set of starting points, ending points, and edges in a graph, For displaying the discovered paths to users, see the `CustomPathProblem` module above. +**GraphPathStateSearch**: An expansion of the above module that allows for tracking state from +start to end of a path, with potential transformation of that state on each edge. + +For example, this can be used to set a maximum search depth or to find cycles in a graph (such as +recursive functions). + +``` +class RecursiveFunctionSearch implements Qtil::GraphPathStateSearchSig { + // Our state is, conceptually, just the function we started the search from. However, we must + // distinguish end nodes based on whether at least one step was taken to reach them. If we don't, + // then all functions will have a flow path (of zero length) to themselves. + newtype State = TNoStepTaken(Function f) or TStepsTaken(Function f); + + // Start nodes haven't yet taken a step, so the state is NoStepTaken + predicate start(Function f, State state) { state = TNoStepTaken(f) } + + // End nodes are functions that reach themselves after at least one step. + predicate end(Function f, State state) { state = TStepsTaken(f) } + + predicate edge(Function f0, State s0, Function f1, State s1) { + // Connect each functions to the functions that they call. + f0.calls(f1) and + exists(Function initial | + // Forward the function along the edge, noting that at least one step has been taken. + (s0 = TNoStepTaken(initial) or s0 = TStepsTaken(initial)) and + s1 = TStepsTaken(initial) + ) + } +} +``` + ### Testing with Qnit While codeql's `test run` subcommand is a great way to test queries, it can be better in some cases diff --git a/src/qtil/graph/GraphPathSearch.qll b/src/qtil/graph/GraphPathSearch.qll index 0fb3de1..5cb3f6d 100644 --- a/src/qtil/graph/GraphPathSearch.qll +++ b/src/qtil/graph/GraphPathSearch.qll @@ -26,6 +26,8 @@ private import qtil.parameterization.Finalize * predicate end(Node n1) { ... } * } * ``` + * + * To track state as well as flow, use `GraphPathStateSearchSig` instead. */ signature module GraphPathSearchSig { /** @@ -106,6 +108,8 @@ signature module GraphPathSearchSig { * - `ReverseNode`: All forward nodes that reach end nodes. * * These classes may be useful in addition to the `hasPath` predicate. + * + * To track state as well as flow, use `GraphPathStateSearch` instead. */ module GraphPathSearch Config> { /** diff --git a/src/qtil/graph/GraphPathStateSearch.qll b/src/qtil/graph/GraphPathStateSearch.qll new file mode 100644 index 0000000..c0c8cf7 --- /dev/null +++ b/src/qtil/graph/GraphPathStateSearch.qll @@ -0,0 +1,284 @@ +/** + * Like `GraphPathSearch`, this file defines a module for efficiently finding paths in a directional + * graph using a performant pattern called forward-reverse pruning. + * + * Additionally, this module is designed to track state through the paths it is looking for. For + * instance, we could use this graph to find recursive functions, which requires knowing how an end + * node was reached from a start node (the state). + * + * Like `GraphPathSearch`, this module uses forward-reverse pruning, wihch is a pattern that is + * useful for efficiently finding connections between nodes in a directional graph. In a first pass, + * it finds nodes reachable from the starting point. In the second pass, it finds the subset of + * those nodes that can be reached from the end point. Together, these create a path from start + * points to end points. + * + * As with the other performance patterns in qtil, this module may be useful as is, or it may not + * fit your needs exactly. CodeQL evaluation and performance is very complex. In that case, consider + * this pattern as an example to create your own solution that fits your needs. + */ + +private import qtil.parameterization.SignatureTypes +private import qtil.parameterization.Finalize + +/** + * Implement this signature to define a graph, and a search for paths within that graph tracking + * some state, using the `GraphPathStateSearch` module. + * + * ```ql + * module MyConfig implements GraphPathStateSearchSig { + * class State extends ... { ... }; + * predicate start(Node n1) { ... } + * predicate edge(Node n1, Node n2) { ... } + * predicate end(Node n1) { ... } + * } + * ``` + * + * To flow without state, use `GraphPathSearchSig` instead. + */ +signature module GraphPathStateSearchSig { + /** + * The state to be tracked through the paths found by this module. + * + * For example, if searching for recursive functions, this class might be defined as: + * + * ```ql + * class State = Function; + * ``` + * + * The `edges` predicate defined in this signature module decides how to forward this state, so + * the state may change as the path is traversed. + */ + bindingset[this] + class State; + + /** + * The nodes that begin the search of the graph, and the starting state for those nodes. + * + * For instance, if searching for recursive functions, this predicate might hold for a Function + * and its state may be the Function itself. + * + * Ultimately, only paths from a start node to an end node will be found by this module. + * + * In most cases, this will ideally be a smaller set of nodes than the end nodes. However, if the + * graph branches in one direction more than the other, a larger set which branches less may be + * preferable. + * + * The design of this predicate has a great effect in how well this performance pattern will + * ultimately perform. + */ + predicate start(Node n1, State s1); + + /** + * A directional edge from `n1` to `n2`, and the state that is forwarded from `n1` to `n2`. + * + * This module will search for paths from `start` to `end` by looking following the direction of + * these edges. + * + * As an example state transformation, a maximum search depth could be tracked at each edge and + * the new state would be the old state with the depth incremented by one. Alternatively, if + * searching for recursive functions, the state could be the starting function, and this edge + * relation would forward that function unchanged. + * + * The design of this predicate has a great effect in how well this performance pattern will + * ultimately perform. + */ + bindingset[s1] + bindingset[s2] + predicate edge(Node n1, State s1, Node n2, State s2); + + /** + * The end nodes of the search, if reached with the given state. + * + * For instance, if searching for recursive functions, this predicate would likely hold when a + * function node is reached with the state being same function declaration (indicating flow from + * the start function to itself). + * + * Ultimately, only paths from a start node to an end node will be found by this module. + * + * The design of this predicate has a great effect in how well this performance pattern will + * ultimately perform. + */ + bindingset[s1] + predicate end(Node n1, State s1); +} + +/** + * A module that implements an efficient search for a path that satisfies specified stateful + * constraints within a custom directional graph from a set of start nodes to a set of end nodes. + * + * For example, this module can be used to detect loops in the graph (perhaps to find recursive + * functions) by setting the "state" to be the start node, forwarding that state unchanged on each + * edge, and considering a node to be an end node if it is reached with itself as the state. + * Alternatively, the state could be used to track a maximum search depth, with a start state of + * zero that is incremented at each edge, and where the edge relation does not hold beyond a certain + * depth. + * + * To show discovered paths to users, see the module `CustomPathStateProblem` which uses this module + * as * its underlying search implementation. + * + * This module uses a pattern called "forward reverse pruning" for efficiency. This pattern is + * useful for reducing the search space when looking for paths in a directional graph. In a first + * pass, it finds nodes reachable from the starting point. In the second pass, it finds the subset + * of those nodes that can be reached from the end point. Together, these create a path from start + * points to end points. + * + * To use this module, provide an implementation of the `GraphPathSearchSig` signature as follows: + * + * ```ql + * module Config implements GraphPathSearchSig { + * class State extends Something { ... }; + * predicate start(Person p, State s) { p.checkSomething() and s = p.getSomeStartValue() } + * predicate edge(Person p1, State s1, Person p2, State s2) { p2 = p1.getAParent() and s2 = s1.next() } + * predicate end(Person p, State s) { p.checkSomethingElse() and s.isValidEndState() } + * } + * ``` + * + * The design of these predicate has a great effect in how well this performance pattern will + * ultimately perform. + * + * The resulting predicate `hasPath` should be a much more efficient search of connected start nodes + * to end nodes than a naive search (which in CodeQL could easily be evaluated as either a full + * graph search, or a search over the cross product of all nodes). + * + * ```ql + * from Person p1, State s1, Person p2, State s2 + * // Fast graph path detection thanks to forward-reverse pruning. + * where GraphPathStateSearch::hasPath(p1, s1, p2, p2) + * select p1, s1, p2, p2 + * ``` + * + * The resulting module also exposes two predicates: + * - `ForwardNode`: All nodes reachable from the start nodes, with member predicate `getState()`. + * - `ReverseNode`: All forward nodes that reach end nodes, with member predicate `getState()`. + * + * These classes may be useful in addition to the `hasPath` predicate. + * + * To track state as well as flow, use `GraphPathStateSearch` instead. + */ +module GraphPathStateSearch Config> { + /** + * The set of all nodes reachable from the start nodes (inclusive). + * + * Includes the member predicate `getState()` which returns the state associated with this node at + * this point in the search. + */ + class ForwardNode extends Final::Type { + Config::State state; + + ForwardNode() { forwardNode(this, state) } + + /** + * Get the state associated with this forward node at this point in the search. + */ + Config::State getState() { result = state } + + string toString() { result = "ForwardNode" } + } + + /** + * The performant predicate for looking forward one step at a time in the graph. + * + * In `GraphPathSearch`, this is fast because it is essentially a unary predicate. The same is + * true here when the correct joins occur, such that (n, s) effectively act as a single value. + * + * For this reason, we use `pragma[only_bind_into]` to ensure the correct join order. + */ + private predicate forwardNode(Node n, Config::State s) { + Config::start(pragma[only_bind_into](n), pragma[only_bind_into](s)) + or + exists(Node n0, Config::State s0 | + forwardNode(pragma[only_bind_into](n0), pragma[only_bind_into](s0)) and + Config::edge(n0, s0, n, s) + ) + } + + /** + * The set of all forward nodes that reach end nodes (inclusive). + * + * Includes the member predicate `getState()` which returns the state associated with this node at + * this point in the search. + * + * These nodes are the nodes that exist along the path from start nodes to end nodes. + * + * Note: this is fast to compute because it is essentially a unary predicate. + */ + class ReverseNode extends ForwardNode { + ReverseNode() { + // 'state' field and getState() predicate are inherited from ForwardNode + reverseNode(this, state) + } + + override string toString() { result = "ReverseNode" } + } + + private predicate reverseNode(Node n, Config::State s) { + forwardNode(pragma[only_bind_into](n), pragma[only_bind_into](s)) and + Config::end(n, s) + or + exists(Node n0, Config::State s0 | + reverseNode(pragma[only_bind_into](n0), pragma[only_bind_into](s0)) and + Config::edge(n, s, n0, s0) + ) + } + + /** + * A start node, end node pair that are connected in the graph. + */ + predicate hasConnection(ReverseNode n1, ReverseNode n2) { hasConnection(n1, _, n2, _) } + + /** + * A start node, end node pair that are connected in the graph, and the states associated with + * those nodes. + */ + predicate hasConnection(ReverseNode n1, Config::State s1, ReverseNode n2, Config::State s2) { + Config::start(n1, s1) and + Config::end(n2, s2) and + ( + hasPath(n1, s1, n2, s2) + or + n1 = n2 and s1 = s2 + ) + } + + /** + * All relevant edges in the graph which participate in a connection from a start to an end node. + */ + predicate pathEdge(ReverseNode n1, ReverseNode n2) { pathEdge(n1, _, n2, _) } + + /** + * All relevant edges in the graph, plus state, which participate in a connection from a start to + * an end node. + */ + predicate pathEdge(ReverseNode n1, Config::State s1, ReverseNode n2, Config::State s2) { + Config::edge(n1, s1, n2, s2) and + reverseNode(pragma[only_bind_into](n2), pragma[only_bind_into](s2)) + } + + /** + * A performant path search within a custom directed graph from a set of start nodes to a set of + * end nodes. + * + * This predicate is the main entry point for the forward-reverse pruning pattern. The design of + * the config predicates has a great effect in how well this performance pattern will ultimately + * perform. + * + * Example: + * ```ql + * from Person p1, Person p2 + * where GraphPathSearch::hasPath(p1, p2) + * select p1, p2 + * ``` + * + * Note: this is fast to compute because limits the search space to nodes found by the fast unary + * searches done to find `ForwardNode` and `ReverseNode`. + */ + predicate hasPath(ReverseNode n1, Config::State s1, ReverseNode n2, Config::State s2) { + Config::start(n1, s1) and + Config::edge(n1, s1, n2, s2) + or + exists(ReverseNode nMid, Config::State sMid | + hasPath(n1, s1, nMid, sMid) and + Config::edge(pragma[only_bind_out](nMid), pragma[only_bind_out](sMid), n2, s2) + ) + } +} diff --git a/src/qtil/locations/CustomPathStateProblem.qll b/src/qtil/locations/CustomPathStateProblem.qll new file mode 100644 index 0000000..32bf304 --- /dev/null +++ b/src/qtil/locations/CustomPathStateProblem.qll @@ -0,0 +1,191 @@ +/** + * A module for creating custom path problem results in CodeQL from a stateful graph search. + */ + +import codeql.util.Location +import qtil.locations.Locatable + +/** + * A module for making a custom stateful path problem library for a given language in CodeQL. + */ +module PathStateProblem LocConfig> { + /** + * To create a custom stateful path problem, simply define the `Node` you want to search (which + * must be `Locatable`) and the `State` class for your path search state. Then, implement the + * `edge` relation, and `start` and `end` predicates to indicate the types of things that should + * be considered problems when connected in the graph. + * + * Optionally, you can also implement the `edgeInfo` and `nodeLabel` predicates to provide + * additional information about the edges and nodes in the graph. + * + * Lastly, import `CustomPathStateProblem` to get the `problem` predicate, which holds for + * pairs of connected locations that will be traceable in the path problem results. + * + * See the `CallGraphPathStateProblemConfig` module for an example of how to use this module. + */ + signature module CustomPathStateProblemConfigSig { + /** + * A class that connects nodes in the graph to search locations. + * + * This class should be as small as possible, to avoid unnecessary search space. + */ + class Node extends LocConfig::Locatable; + + /** + * A class that represents the state of the path search. + * + * This is initialized in `start()` and checked in `end()`. It also may be forwarded and/or + * transformed in the `edge()` predicate. + */ + bindingset[this] + class State; + + /** + * The directional edges of the graph, from `a` to `b`, and how the state progresses from `s1` + * to `s2` at this edge. + * + * The design of this predicate will have a large impact on the performance of the search. + * However, the underlying search algorithm is efficient, so this should be fast in many cases + * even if this is a very large relation. + */ + bindingset[s1] + bindingset[s2] + predicate edge(Node a, State s1, Node b, State s2); + + /** + * Optional predicate to set additional information on the edges of the graph. + * + * By setting `key` to "provenance", the `val` string will be displayed in the path problem + * results, with one line per word in `val`. + */ + bindingset[a, b] + default predicate edgeInfo(Node a, Node b, string key, string val) { key = "" and val = "" } + + /** + * Optional predicate to set a label on the nodes of the graph. + * + * This does not appear to be used by vscode when displaying path problem results, but it is + * still part of the path problem API. + */ + bindingset[n] + default predicate nodeLabel(Node n, string value) { value = n.toString() } + + /** + * Where the graph search should start with a given initial state. + * + * If this node is connected to a node `x` that holds for `end(x)`, then `problem(n, x)` will hold + * and edges between them will be added to the path problem results. + */ + predicate start(Node n, State s); + + /** + * Where the graph search should end (an end node and an end state). + * + * If this node is connected to a node `x` that holds for `start(x)`, then `problem(x, n)` will hold + * and edges between them will be added to the path problem results. + */ + bindingset[s] + predicate end(Node n, State s); + } + + /** + * A module for creating custom path problem results in CodeQL, using an efficient forward-reverse + * search pattern under the hood with state tracked along the edges. + * + * Implement `CustomPathStateProblemConfigSig` to define the nodes and edges of your graph, as well as + * start and end predicates to indicate the types of things that should be considered problems + * when connected in the graph. + * + * Then import this module, and select nodes for which `problem(a, b)` holds, and they will be + * traceable in the path problem results. + * + * Example usage: + * ```ql + * module MacroPathProblemConfig implements CustomPathProblemConfigSig { + * class Node extends Locatable { + * Node() { this instanceof Macro or this instanceof MacroInvocation } + * } + * + * class State = int; // Set a max search depth + * + * predicate start(Node n, State depth) { + * // Start at root macro invocations + * n instanceof MacroInvocation and not exists(n.(MacroInvocation).getParentInvocation()) and + * // Set the initial state to a depth of 0 + * depth = 0 + * } + * + * // Find calls to macros we don't like, at any depth + * predicate end(Node n, State depth) { n instanceof Macro and isBad(n) and depth = any() } + * + * predicate edge(Node a, State s1, Node b, State s2) { + * // Limit the search depth to 10 + * s1 < 10 and + * // Increment the state which represents the search depth + * s2 = s1 + 1 and + * ( + * // The root macro invocation is connected to its definition + * b = a.(MacroInvocation).getMacro() + * or + * exists(MacroInvocation inner, MacroInvocation next | + * // Connect inner macros to the macros that invoke them + * inner.getParentInvocation() = next() and + * a = inner.getMacro() and b = next.getMacro() + * ) + * ) + * } + * } + * + * // Import query predicates that make path-problem work correctly + * import CustomPathStateProblem + * + * from MacroInvocation start, Macro end + * where problem(start, end) // find macro invocations that are connected to bad macros + * select start, start, end, "Macro invocation eventually calls a macro we don't like: $@", end, end.getName() + * ``` + * + * There is also a predicate `problem(a, s1, b, s2)` for reporting problems with their stateful + * search results. + */ + module CustomPathStateProblem { + private import qtil.graph.GraphPathStateSearch as Search + + private module ForwardReverseConfig implements Search::GraphPathStateSearchSig { + import Config + } + + private import Search::GraphPathStateSearch as SearchResults + + /** The magical `edges` query predicate that powers `@kind path-problem` along with `nodes`. */ + query predicate edges(LocConfig::Locatable a, LocConfig::Locatable b, string key, string val) { + SearchResults::pathEdge(a, b) and + Config::edgeInfo(a, b, key, val) + } + + /** The magical `nodes` query predicate that powers `@kind path-problem` along with `edges`. */ + query predicate nodes(Config::Node n, string key, string value) { + n instanceof SearchResults::ReverseNode and + // It seems like "semmle.label" is the only valid key. + key = "semmle.label" and + Config::nodeLabel(n, value) + } + + /** + * A predicate that holds for locations that are connected in the graph. + * + * These pairs should all be problems reported by the query, otherwise the search space is larger + * than necessary. + */ + predicate problem(Config::Node a, Config::Node b) { SearchResults::hasConnection(a, b) } + + /** + * A predicate that holds for locations that are connected in the graph. + * + * These pairs should all be problems reported by the query, otherwise the search space is larger + * than necessary. + */ + predicate problem(Config::Node a, Config::State s1, Config::Node b, Config::State s2) { + SearchResults::hasConnection(a, s1, b, s2) + } + } +} diff --git a/test/qtil/graph/GraphPathSearchTest.ql b/test/qtil/graph/GraphPathSearchTest.ql index 10057fa..90a4077 100644 --- a/test/qtil/graph/GraphPathSearchTest.ql +++ b/test/qtil/graph/GraphPathSearchTest.ql @@ -2,8 +2,6 @@ import qtil.testing.Qnit import qtil.graph.GraphPathSearch import Family -signature class FiniteType; - module BartToGrandpaConfig implements GraphPathSearchSig { predicate start(Person p) { p.getName() = "Bart" } diff --git a/test/qtil/graph/GraphPathStateSearchTest.expected b/test/qtil/graph/GraphPathStateSearchTest.expected new file mode 100644 index 0000000..1ecb061 --- /dev/null +++ b/test/qtil/graph/GraphPathStateSearchTest.expected @@ -0,0 +1 @@ +| All 14 tests passed. | diff --git a/test/qtil/graph/GraphPathStateSearchTest.ql b/test/qtil/graph/GraphPathStateSearchTest.ql new file mode 100644 index 0000000..4273e89 --- /dev/null +++ b/test/qtil/graph/GraphPathStateSearchTest.ql @@ -0,0 +1,277 @@ +import qtil.testing.Qnit +import qtil.graph.GraphPathStateSearch +import Family + +bindingset[relation] +bindingset[result] +string parentString(string relation) { + if relation = "child" + then result = "parent" + else + if relation = "parent" + then result = "grandparent" + else result = "great " + relation +} + +bindingset[relation] +bindingset[result] +string childString(string relation) { + if relation = "parent" + then result = "child" + else + if relation = "child" + then result = "grandchild" + else result = "great " + relation +} + +module BartToGrandpaConfig implements GraphPathStateSearchSig { + class State = string; + + predicate start(Person p, string state) { p.getName() = "Bart" and state = "child" } + + predicate end(Person p, string state) { p.getName() = "Grandpa" and state = "grandparent" } + + bindingset[s1] + bindingset[s2] + predicate edge(Person p1, string s1, Person p2, string s2) { + p2 = p1.getAParent() and + s2 = parentString(s1) + } +} + +module GrandpaToBartConfig implements GraphPathStateSearchSig { + class State = string; + + predicate start(Person p, string state) { p.getName() = "Grandpa" and state = "parent" } + + predicate end(Person p, string state) { p.getName() = "Bart" and state = "grandchild" } + + bindingset[s1] + bindingset[s2] + predicate edge(Person p1, State s1, Person p2, State s2) { + p2 = p1.getAChild() and + s2 = childString(s1) + } +} + +class TestBartForwardNodesContain extends Test, Case { + override predicate run(Qnit test) { + if + forall(Person p | + p.getName() = ["Bart", "Homer", "Marge", "Clancy", "Jacquelin", "Mona", "Grandpa"] + | + p instanceof GraphPathStateSearch::ForwardNode + ) + then test.pass("All forward nodes from Bart exist") + else test.fail("Some forward nodes from Bart are missing") + } +} + +class TestBartForwardNodesState extends Test, Case { + override predicate run(Qnit test) { + if + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Bart" and + fwd.getState() = "child" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Marge" and + fwd.getState() = "parent" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Homer" and + fwd.getState() = "parent" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Clancy" and + fwd.getState() = "grandparent" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Jacquelin" and + fwd.getState() = "grandparent" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Mona" and + fwd.getState() = "grandparent" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Grandpa" and + fwd.getState() = "grandparent" + ) + then test.pass("All forward nodes from Bart have the correct state") + else test.fail("Some forward nodes from Bart have incorrect state") + } +} + +class TestBartForwardNodesDoNotContain extends Test, Case { + override predicate run(Qnit test) { + if + exists(GraphPathStateSearch::ForwardNode person | + not person.getName() = ["Bart", "Homer", "Marge", "Clancy", "Jacquelin", "Mona", "Grandpa"] + ) + then test.fail("Some unexpected forward nodes from Bart exist") + else test.pass("No forward nodes from Bart exist that shouldn't") + } +} + +class TestBartReverseNodesContain extends Test, Case { + override predicate run(Qnit test) { + if + forall(Person p | p.getName() = ["Bart", "Homer", "Grandpa"] | + p instanceof GraphPathStateSearch::ReverseNode + ) + then test.pass("All reverse nodes from Bart exist") + else test.fail("Some reverse nodes from Bart are missing") + } +} + +class TestBartReverseNodesState extends Test, Case { + override predicate run(Qnit test) { + if + exists(GraphPathStateSearch::ReverseNode rev | + rev.getName() = "Bart" and + rev.getState() = "child" + ) and + exists(GraphPathStateSearch::ReverseNode rev | + rev.getName() = "Homer" and + rev.getState() = "parent" + ) and + exists(GraphPathStateSearch::ReverseNode rev | + rev.getName() = "Grandpa" and + rev.getState() = "grandparent" + ) + then test.pass("All reverse nodes from Bart have the correct state") + else test.fail("Some reverse nodes from Bart have incorrect state") + } +} + +class TestBartReverseNodesDoNotContain extends Test, Case { + override predicate run(Qnit test) { + if + exists(GraphPathStateSearch::ReverseNode person | + not person.getName() = ["Bart", "Homer", "Grandpa"] + ) + then test.fail("Some unexpected reverse nodes from Bart exist") + else test.pass("No reverse nodes from Bart exist that shouldn't") + } +} + +class TestBartToGrandpaHasPath extends Test, Case { + override predicate run(Qnit test) { + if + exists(Person bart, Person grandpa | + bart.getName() = "Bart" and + grandpa.getName() = "Grandpa" and + GraphPathStateSearch::hasPath(bart, "child", grandpa, + "grandparent") + ) + then test.pass("Path from Bart to Grandpa exists") + else test.fail("Path from Bart to Grandpa does not exist") + } +} + +class TestGrandpaToBartForwardNodesContain extends Test, Case { + override predicate run(Qnit test) { + if + forall(Person p | p.getName() = ["Grandpa", "Homer", "Bart", "Maggie", "Lisa"] | + p instanceof GraphPathStateSearch::ForwardNode + ) + then test.pass("All forward nodes from Grandpa exist") + else test.fail("Some forward nodes from Grandpa are missing") + } +} + +class TestGrandpaToBartForwardNodesState extends Test, Case { + override predicate run(Qnit test) { + if + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Grandpa" and + fwd.getState() = "parent" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Homer" and + fwd.getState() = "child" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Bart" and + fwd.getState() = "grandchild" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Maggie" and + fwd.getState() = "grandchild" + ) and + exists(GraphPathStateSearch::ForwardNode fwd | + fwd.getName() = "Lisa" and + fwd.getState() = "grandchild" + ) + then test.pass("All forward nodes from Grandpa have the correct state") + else test.fail("Some forward nodes from Grandpa have incorrect state") + } +} + +class TestGrandpaToBartForwardNodesDoNotContain extends Test, Case { + override predicate run(Qnit test) { + if + exists(GraphPathStateSearch::ForwardNode person | + not person.getName() = ["Grandpa", "Homer", "Bart", "Maggie", "Lisa"] + ) + then test.fail("Some unexpected forward nodes from Grandpa exist") + else test.pass("No forward nodes from Grandpa exist that shouldn't") + } +} + +class TestGrandpaToBartReverseNodesContain extends Test, Case { + override predicate run(Qnit test) { + if + forall(Person p | p.getName() = ["Grandpa", "Homer", "Bart"] | + p instanceof GraphPathStateSearch::ReverseNode + ) + then test.pass("All reverse nodes from Grandpa exist") + else test.fail("Some reverse nodes from Grandpa are missing") + } +} + +class TestGrandpaToBartReverseNodesState extends Test, Case { + override predicate run(Qnit test) { + if + exists(GraphPathStateSearch::ReverseNode rev | + rev.getName() = "Grandpa" and + rev.getState() = "parent" + ) and + exists(GraphPathStateSearch::ReverseNode rev | + rev.getName() = "Homer" and + rev.getState() = "child" + ) and + exists(GraphPathStateSearch::ReverseNode rev | + rev.getName() = "Bart" and + rev.getState() = "grandchild" + ) + then test.pass("All reverse nodes from Grandpa have the correct state") + else test.fail("Some reverse nodes from Grandpa have incorrect state") + } +} + +class TestGrandpaToBartReverseNodesDoNotContain extends Test, Case { + override predicate run(Qnit test) { + if + exists(GraphPathStateSearch::ReverseNode person | + not person.getName() = ["Grandpa", "Homer", "Bart"] + ) + then test.fail("Some unexpected reverse nodes from Grandpa exist") + else test.pass("No reverse nodes from Grandpa exist that shouldn't") + } +} + +class TestGrandpaToBartHasPath extends Test, Case { + override predicate run(Qnit test) { + if + exists(Person grandpa, Person bart | + grandpa.getName() = "Grandpa" and + bart.getName() = "Bart" and + GraphPathStateSearch::hasPath(grandpa, "parent", bart, + "grandchild") + ) + then test.pass("Path from Grandpa to Bart exists") + else test.fail("Path from Grandpa to Bart does not exist") + } +} diff --git a/test/qtil/locations/CustomPathProblem/CustomPathStateProblemTest.expected b/test/qtil/locations/CustomPathProblem/CustomPathStateProblemTest.expected new file mode 100644 index 0000000..2c94b99 --- /dev/null +++ b/test/qtil/locations/CustomPathProblem/CustomPathStateProblemTest.expected @@ -0,0 +1,35 @@ +WARNING: unused variable 'depth' (CustomPathStateProblemTest.ql:49,29-34) +edges +| test.cpp:17:8:17:9 | call to C1 | test.cpp:2:13:2:14 | C1 | | | +| test.cpp:22:12:22:13 | call to f1 | test.cpp:17:8:17:9 | call to C1 | | | +| test.cpp:33:5:33:6 | g1 | test.cpp:33:10:33:11 | call to f1 | | | +| test.cpp:33:10:33:11 | call to f1 | test.cpp:17:8:17:9 | call to C1 | | | +| test.cpp:34:5:34:6 | g2 | test.cpp:34:10:34:11 | call to f2 | | | +| test.cpp:34:10:34:11 | call to f2 | test.cpp:22:12:22:13 | call to f1 | | | +| test.cpp:37:4:37:5 | c1 | test.cpp:37:4:37:5 | call to C1 | | | +| test.cpp:37:4:37:5 | call to C1 | test.cpp:2:13:2:14 | C1 | | | +| test.cpp:38:5:38:6 | g5 | test.cpp:38:10:38:13 | call to C3 | | | +| test.cpp:38:10:38:13 | call to C3 | test.cpp:10:13:10:14 | C3 | | | +| test.cpp:39:5:39:6 | g6 | test.cpp:39:17:39:18 | call to f2 | | | +| test.cpp:39:17:39:18 | call to f2 | test.cpp:22:12:22:13 | call to f1 | | | +nodes +| test.cpp:2:13:2:14 | C1 | semmle.label | C1 | +| test.cpp:10:13:10:14 | C3 | semmle.label | C3 | +| test.cpp:17:8:17:9 | call to C1 | semmle.label | call to C1 | +| test.cpp:22:12:22:13 | call to f1 | semmle.label | call to f1 | +| test.cpp:33:5:33:6 | g1 | semmle.label | g1 | +| test.cpp:33:10:33:11 | call to f1 | semmle.label | call to f1 | +| test.cpp:34:5:34:6 | g2 | semmle.label | g2 | +| test.cpp:34:10:34:11 | call to f2 | semmle.label | call to f2 | +| test.cpp:37:4:37:5 | c1 | semmle.label | c1 | +| test.cpp:37:4:37:5 | call to C1 | semmle.label | call to C1 | +| test.cpp:38:5:38:6 | g5 | semmle.label | g5 | +| test.cpp:38:10:38:13 | call to C3 | semmle.label | call to C3 | +| test.cpp:39:5:39:6 | g6 | semmle.label | g6 | +| test.cpp:39:17:39:18 | call to f2 | semmle.label | call to f2 | +#select +| test.cpp:33:5:33:6 | g1 | test.cpp:33:5:33:6 | g1 | test.cpp:2:13:2:14 | C1 | Initialization of variable $@ calls constructor $@ at depth 3 | test.cpp:33:5:33:6 | g1 | g1 | test.cpp:2:13:2:14 | C1 | C1 | +| test.cpp:34:5:34:6 | g2 | test.cpp:34:5:34:6 | g2 | test.cpp:2:13:2:14 | C1 | Initialization of variable $@ calls constructor $@ at depth 4 | test.cpp:34:5:34:6 | g2 | g2 | test.cpp:2:13:2:14 | C1 | C1 | +| test.cpp:37:4:37:5 | c1 | test.cpp:37:4:37:5 | c1 | test.cpp:2:13:2:14 | C1 | Initialization of variable $@ calls constructor $@ at depth 2 | test.cpp:37:4:37:5 | c1 | c1 | test.cpp:2:13:2:14 | C1 | C1 | +| test.cpp:38:5:38:6 | g5 | test.cpp:38:5:38:6 | g5 | test.cpp:10:13:10:14 | C3 | Initialization of variable $@ calls constructor $@ at depth 2 | test.cpp:38:5:38:6 | g5 | g5 | test.cpp:10:13:10:14 | C3 | C3 | +| test.cpp:39:5:39:6 | g6 | test.cpp:39:5:39:6 | g6 | test.cpp:2:13:2:14 | C1 | Initialization of variable $@ calls constructor $@ at depth 4 | test.cpp:39:5:39:6 | g6 | g6 | test.cpp:2:13:2:14 | C1 | C1 | diff --git a/test/qtil/locations/CustomPathProblem/CustomPathStateProblemTest.ql b/test/qtil/locations/CustomPathProblem/CustomPathStateProblemTest.ql new file mode 100644 index 0000000..f40179f --- /dev/null +++ b/test/qtil/locations/CustomPathProblem/CustomPathStateProblemTest.ql @@ -0,0 +1,99 @@ +/** + * @name Custom Path State Problem Example + * @description This example demonstrates how to define a custom path problem in C++ using Qtil. It + * identifies paths from top-level variables to constructors that are called during their + * initialization. Additionally, it tracks the depth of the search as a state. + * @id qtil-example-custom-path-problem + * @severity info + * @kind path-problem + */ + +import cpp +import cpp as cpp +import qtil.locations.Locatable +import qtil.locations.CustomPathStateProblem +import CustomPathStateProblemCpp + +/** Defines cpp location behavior; this will be moved to qtil.cpp eventually. */ +module CustomPathStateProblemCpp { + module ElementConfig implements LocatableConfig { + class Locatable = cpp::Locatable; + } + + import PathStateProblem +} + +/** + * Defines a custom path problem configuration for identifying paths from top-level variables to + * constructors that are called during their initialization. + */ +module CallGraphPathProblemConfig implements CustomPathStateProblemConfigSig { + /** + * Since we are tracking flow from variable initialization to constructor calls, that means the + * nodes in our path problem will be variables (roots), function calls (edges), and constructors + * (end nodes). + */ + class Node extends Locatable { + Node() { + this instanceof Function or this.(Variable).isTopLevel() or this instanceof FunctionCall + } + } + + class State = int; // Track search depth + + /** Start searching from variable nodes */ + predicate start(Node n, int depth) { n instanceof Variable and depth = 0 } + + /** If we reach a constructor, we have identified "problematic" flow from a variable */ + bindingset[depth] + predicate end(Node n, int depth) { + exists(Function f, Class c | + n = f and + c.getAConstructor() = f + ) + } + + bindingset[depth1] + bindingset[depth2] + predicate edge(Node a, int depth1, Node b, int depth2) { + depth2 = depth1 + 1 and + ( + // Increment depth for each edge traversed + // Add an edge from variables to the function calls in that variable's initializer. + exists(Variable var, Expr initializer, FunctionCall fc | + var.getInitializer().getExpr() = initializer and + fc.getParent*() = initializer and + a = var and + b = fc + ) + or + // Supposing we have reached a function call to some function `mid()`, then the next step in + // the path problem will be one of the function calls in `mid()`. + exists(FunctionCall fc, Function mid, FunctionCall next | + mid = fc.getTarget() and + next.getEnclosingFunction() = mid and + a = fc and + b = next + ) + or + // Add an edge from function calls to constructors, which are the end nodes. + exists(FunctionCall fc, Function endFunc | + fc.getTarget() = endFunc and + end(endFunc, 0) and + a = fc and + b = endFunc + ) + ) + } +} + +// Import the custom path problem configuration and define the problem. +// +// This automaticall generates the `nodes` and `edges` predicates based on the configuration that +// make the path traceable for users. +import CustomPathStateProblem + +from Variable var, Function ctor, int depth +where problem(var, _, ctor, depth) // This finds for paths from variables to constructors +select var, var, ctor, "Initialization of variable $@ calls constructor $@ at depth " + depth, var, + var.getName(), ctor, ctor.getName()