Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: feat: add-analysis #30

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ pom.xml.asc
.clj-kondo/
.cpcache/
.lsp/
.cljs_node_repl/
node_modules/
out/
73 changes: 73 additions & 0 deletions src/datalog/analysis.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
(ns datalog.analysis
(:require [clojure.set :as set]
[datalog.analysis.graph :as ag]
[datalog.analysis.mode :as am]
[datalog.parser.impl :as impl]
[datalog.parser.impl.util :as util]
[datalog.parser.type :as t]
[medley.core :as m]))

;; cycles in rules

(defn branch-calls-rule [rule-name branch]
(->> (:clauses branch)
(tree-seq seq? identity)
(m/find-first #(and (symbol? %2)
(= (name %2) rule-name)))))

(defn rule-calls-rule [rule1 rule-name2]
(m/find-first (partial branch-calls-rule rule-name2)
(:branches rule1)))

(defn recursive-rules
"Returns names of rules with cycles"
[rules]
(let [dependencies (->> rules
(map #(vector (:name %)
(->> (keys rules)
(filter (partial rule-calls-rule %))
set)))
(apply hash-map))]

(filter #(ag/has-cycle? dependencies %)
(keys dependencies))))

;; range restriction

(defn check-range-restriction
"Checks range restriction of rules"
[parsed-rules]
(run! (fn [[rule-name branches]]
(for [{:keys [vars clauses]} branches
:let [missing (set/difference
(set (impl/flatten-rule-vars vars))
(t/collect-vars #{} clauses))]
:when (seq missing)]
(util/raise "Rule body must contain all vars from rule head"
{:error :analyzer/range-restriction, :rule rule-name :clauses clauses :missing missing})))
parsed-rules))


;; modes

(defn well-modable
"Determines moding options for clauses in a bottom.up fashion"
([parsed-query] (well-modable parsed-query {}))
([{:keys [qwhere qin]} parsed-rules]
(let [in-vars qin
rule-modes (am/infere-rule-modes parsed-rules)
where-modes (am/infere-where-modes qwhere rule-modes)
optional-obligations (:obligations (am/obligation-map where-modes))]
(when-not (m/find-first #(set/subset? % in-vars) optional-obligations)
(util/raise (str "Query is not satisfiable. "
"Possible obligations are: " optional-obligations)))
where-modes)))

(defn well-moded
"Simulates the sequential execution of clauses to determine well-modedness.
Returns sets of eventual bindings."
([parsed-query] (well-moded parsed-query {}))
([{:keys [qwhere qin]} parsed-rules]
(let [in-vars qin
rule-modes (am/infere-rule-modes parsed-rules)]
(am/sequential-bindings rule-modes in-vars qwhere))))
24 changes: 24 additions & 0 deletions src/datalog/analysis/graph.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
(ns datalog.analysis.graph
(:require [clojure.data.priority-map :refer [priority-map]]
[medley.core :as m]))

(defn dijkstra
"Computes shortest distances for graph with dijkstra algorithm using a priority queue.
Complexity is O(|E|+|V|lg(|V|)).
- vertex: vertex to compute distances for
- neighbours: function or map containing the list of neighbouring vertices"
[vertex neighbours]
(loop [queue (priority-map vertex 0)
distances {}]
(if-let [[vertex distance] (peek queue)]
(let [dist (-> (neighbours vertex)
(m/remove-keys (keys distances))
(m/map-vals (partial + distance)))]
(recur (merge-with min (pop queue) dist)
(assoc distances vertex distance)))
distances)))


(defn has-cycle? [graph start-vertex]
(let [reachable-nodes (keys (dijkstra start-vertex graph))]
(m/find-first #(graph % start-vertex) reachable-nodes)))
281 changes: 281 additions & 0 deletions src/datalog/analysis/mode.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
(ns datalog.analysis.mode
(:require [clojure.math.combinatorics :as c]
[clojure.set :as set]
[clojure.spec.alpha :as s]
[clojure.walk :as walk]
[datalog.analysis.proto :as ap]
[datalog.parser.impl :as impl]
[datalog.parser.impl.proto :as p]
[datalog.parser.impl.util :as util]
[datalog.parser.type :as t]
[medley.core :as m]
[spec-tools.data-spec :as ds]))

(defn set-of [pred] (s/every pred :kind set?))
(defn vec-of [pred] (s/every pred :kind vector?))

(def SModedClause
(ds/spec
{:name ::moded-clause
:keys-default ds/req
:spec {:form (partial satisfies? p/ITraversable)
:vars (vec-of symbol?)
:modes (set-of (set-of :symbol))}})) ;; TODO: probably set of symbols is enough

;; well modable

(defn min-modes
"Apply min to mode-set"
[modes]
(reduce (fn [mm new-mode] (if-let [supersets (seq (filter (partial set/subset? new-mode) mm))]
(conj (set/difference mm supersets) new-mode)
(if (seq (filter (partial set/superset? new-mode) mm))
mm
(conj mm new-mode))))
#{(first modes)}
(rest modes)))

(defn translate-modes
"Returns translations of a mode set.
Throws if a mode set is invalid in this context.
An empty mode set indicates an unsatisfiable clause in the current context."
[new-vars {:keys [form modes vars] :as _moded-clause}]
(let [index-translation (->> vars
(map-indexed (fn [idx old-var] [idx (.indexOf new-vars old-var)]))
(apply hash-map))
new-modes (map #(replace index-translation %) modes) ;; TODO: probably only 1 mode, valid or invalid
{valid false
invalid true} (group-by #(contains? % -1) new-modes)]
(when (seq invalid)
(util/raise "Invalid modes in this context: " invalid
{:form form
:new-vars new-vars
:new-modes new-modes}))
{:form form
:vars new-vars
:modes (set valid)}))

(defn join-domains
"Join mode domains of different clauses"
([moded-clauses] (join-domains moded-clauses nil))
([moded-clauses bindable-vars]
(let [all-vars (->> moded-clauses (map :vars) set)
new-vars (vec (if bindable-vars
(set/intersection all-vars bindable-vars)
all-vars))]
(map (partial translate-modes new-vars)
moded-clauses))))

(defn merge-clauses
"Join modes of clauses to require the same entrance conditions"
[moded-clauses]
(let [new-clauses (join-domains moded-clauses)]
{:form (t/->AndP (map :form moded-clauses))
:vars (:vars (first new-clauses))
:modes (->> new-clauses
(map :modes)
(reduce (fn [acc-modes new-modes]
(for [modes1 acc-modes
modes2 new-modes]
(set/union modes1 modes2)))
[])
set
min-modes)}))

(defn join-or-clauses
"Join alternative clauses (same head predicate)"
[orig bindable-vars moded-clauses]
(let [new-clauses (join-domains bindable-vars moded-clauses)]
{:form orig
:vars (:vars (first new-clauses))
:modes (->> new-clauses
(map :modes)
(reduce (fn [acc-modes new-modes]
(for [modes1 acc-modes
modes2 new-modes]
(set/union modes1 modes2)))
[])
set
min-modes)
:alts new-clauses}))

(defn remove-bound [obligations newly-bound]
(->> obligations
(map #(set/difference % newly-bound))
(filter seq)
set))

(defn greedy-obligation-paths
"Calculates minimal orderings of clauses of a single level.
Returns nil on failure.
Returns sequences of clauses with last element containing the obligation set."
[bindable-vars bound-vars obligation-acc edges]
(let [{natural true
non-natural false} (group-by #(contains? (:obligations %) #{})
edges)
second-choices (filter (fn [edge] (seq (m/find-first (fn [obligation] (set/subset? obligation bindable-vars))
(:obligations edge))))
non-natural)]
(cond
(seq natural) ;; natural edges with empty / trivial obligation
(let [joined (merge-clauses (map :origin natural))
newly-bound (set/difference (:vars joined) bound-vars)]
(->> non-natural
(map #(update % :obligations remove-bound newly-bound))
(greedy-obligation-paths bindable-vars
(set/union bound-vars newly-bound)
obligation-acc)
(filter identity)
(map #(cons joined %))))

(empty? non-natural) ;; terminal vertex, successful
[[{:obligation obligation-acc}]]

(empty? second-choices) ;; unsatisfiable path
nil

:else ;; picks from non-natural edges with satifiable obligation
(->> second-choices
(map (fn [{:keys [obligations var-set origin] :as picked}]
(let [newly-bound (set/difference var-set bound-vars)
updated-edges (->> non-natural
(filter #(= picked %))
(map #(update % :obligations remove-bound newly-bound)))]
(->> obligations
(mapcat (fn [obligation]
(->> updated-edges
(greedy-obligation-paths bindable-vars
(set/union bound-vars newly-bound)
(conj obligation-acc obligation))
(filter identity)
(map #(cons origin %)))))))))))))

(defn obligation-map [{:keys [modes vars] :as moded-clause}]
{:obligations (walk/postwalk-replace vars modes)
:var-set (set vars)
:origin moded-clause})

(defn min-reorder
"Finds min-reorderings and modes for clauses in a common domain"
[orig bindable-vars moded-clauses]
(let [paths (->> moded-clauses
(map obligation-map)
(greedy-obligation-paths bindable-vars #{} #{}))
_ (when-not paths
(util/raise "No satisfiable reordering for clause set"
{:clauses (mapv :clause moded-clauses)
:bindable-vars bindable-vars}))
vars (vec bindable-vars)
obl->mode (set/map-invert vars)
alts (map (fn [path] {:form (vec (butlast path))
:modes #{(replace obl->mode (:obligation (last path)))}})
paths)]
{:form orig
:vars vars
:modes (set (map (comp first :modes) alts))
:alts alts})) ;; TODO: probably no alts needed as every alt needs to be applicable

(declare infere-and-context-modes infere-or-context-modes)

(defn infere-clause-mode [bindable-vars rule-modes clause]
(cond
(satisfies? ap/IInitialModing clause) (ap/-initial-modes clause)
(instance? t/RuleExpr clause) (rule-modes (:name clause))
(instance? t/Or clause) (infere-or-context-modes clause (:rule-vars clause) rule-modes (:clauses clause))
(instance? t/And clause) (infere-and-context-modes clause bindable-vars rule-modes (:clauses clause))
(instance? t/Not clause) (infere-and-context-modes clause (:vars clause) rule-modes (:clauses clause))))


(defn infere-or-context-modes
"Find modes of all clauses and infere min modes of branches"
[orig bindable-vars rule-modes moded-clauses]
(->> moded-clauses
(map #(infere-clause-mode bindable-vars rule-modes %))
(join-or-clauses orig bindable-vars)))

(defn infere-and-context-modes
"Find modes of all clauses and infere min modes of context by reordering"
[orig bindable-vars rule-modes moded-clauses]
(->> moded-clauses
(map #(infere-clause-mode bindable-vars rule-modes %))
(partial min-reorder orig bindable-vars)))

(defn rule-vars [rule]
(-> rule :branches first :vars impl/flatten-rule-vars vec))

(defn infere-rule-modes [rules]
(let [rule-map (m/indexed-map rules :name)]
(loop [modes (m/map-vals #(set (set (range (count (rule-vars %)))))
rules)]
(let [new-modes (m/map-vals (fn [{:keys [branches] :as rule}]
(let [vars (set (rule-vars rule))]
(->> branches
(map #(infere-and-context-modes % vars modes (:clauses %)))
(infere-or-context-modes rule vars modes))))
rule-map)]
(if (= modes new-modes)
modes
(recur new-modes))))))

(defn infere-where-modes [qwhere rule-modes]
(let [vars (t/collect-vars #{} qwhere)]
{:form qwhere
:vars vars
:modes (infere-and-context-modes (t/And qwhere) vars rule-modes qwhere)}))

;; well moded

(defn join-binding-options [options1 options2]
(->> (c/cartesian-product options1 options2)
(map (partial apply set/union))
(reduce into #{})))

(defn valid-bindings [binding-options required]
(->> binding-options
(map #(set/difference required %))
(remove empty?)))

(defn resolve-rule-options ;; return binding options
"Returns vars that can be bound by any of the options"
[binding-options rule-mode clause]
(let [moded-clause (assoc rule-mode :vars (rule-vars clause))
{:keys [var-set obligations]} (obligation-map moded-clause)
options (map #(let [valid-options (valid-bindings binding-options (:required %))]
(hash-map :required %
:valid-options valid-options
:out (set/difference var-set %)))
obligations)
valid (filter (comp seq :valid-options) options)]
(if (empty? valid)
(util/raise "No satisfiable mode found for '" clause
{:binding-options binding-options
:required-options (set (map :required options))})
(->> valid
(map #(join-binding-options (:valid-options %)
[(set/difference var-set (:required %))]))
set))))

(defn resolve-options [binding-options required-vars bindings clause]
(let [valid-options (valid-bindings binding-options required-vars)]
(if (empty? valid-options)
(util/raise "No satisfiable mode found for '" clause
{:binding-options binding-options
:required required-vars})
(join-binding-options (set valid-options) [bindings]))))

(declare sequential-bindings)

(defn resolve-clause [rule-modes binding-options clause]
(cond
(satisfies? ap/IInitialModing clause) (resolve-options binding-options (ap/-required clause) (ap/-binds clause) clause)
(instance? t/RuleExpr clause) (resolve-rule-options binding-options (rule-modes (:name clause)) clause)
(instance? t/Or clause) (set (mapcat (partial resolve-clause rule-modes binding-options)
(:clauses clause)))
(instance? t/And clause) (sequential-bindings rule-modes binding-options (:clauses clause))
(instance? t/Not clause) (do (sequential-bindings rule-modes #{} (:clauses clause))
(resolve-options binding-options (:vars clause) #{} clause))))

(defn sequential-bindings
"Computes the sets of bindings returned by executing the queries sequentially"
[rule-modes binding-options clauses]
(reduce (partial resolve-clause rule-modes) binding-options clauses))
Loading