From 1ecb45a67eaec9bf1278a0f2b50f376deb9abcfb Mon Sep 17 00:00:00 2001 From: Gunther Klessinger Date: Fri, 10 Feb 2023 23:53:25 +0100 Subject: [PATCH] feat: deep2, deep3 lookup perf improvements - deep2 creates item and attr getters - deep3 creates a func via eval 4 times speed improvements seen --- .README.tmpl.md | 2 +- .pip_up.sh | 2 +- README.md | 524 +++++++++++++++++++++----------------- pycond.py | 181 ++++++++++--- pyproject.toml | 4 +- tests/test_getter_perf.py | 40 +++ tests/test_perf.py | 45 ++-- tests/test_pycond.py | 45 +++- tests/test_rx.py | 23 +- tests/test_tutorial.py | 340 +++++++++++++++---------- 10 files changed, 749 insertions(+), 457 deletions(-) create mode 100644 tests/test_getter_perf.py diff --git a/.README.tmpl.md b/.README.tmpl.md index 514bc39..060a72b 100644 --- a/.README.tmpl.md +++ b/.README.tmpl.md @@ -1,7 +1,7 @@ --- author: gk -version: 20230211 +version: 20230212 --- diff --git a/.pip_up.sh b/.pip_up.sh index c295278..a9af2e9 100755 --- a/.pip_up.sh +++ b/.pip_up.sh @@ -25,7 +25,7 @@ cd "$here" || exit 1 clean rm -f README.md pytest tests || exit 1 -git commit --amend -am 'links auto replaced' +lazygit #unset NOLINKREPL #git commit -am 'pre_pypi_upload' # to have the commit hash for the links #slt="https://github.com/axiros/DevApps/blob/`git rev-parse HEAD`" diff --git a/README.md b/README.md index f294210..8b59baa 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ --- author: gk -version: 20230211 +version: 20230212 --- @@ -29,54 +29,56 @@ version: 20230211 - [Parsing](#parsing) - [Building](#building) - [Structured Conditions](#structured-conditions) -- [Evaluation](#evaluation) + - [Evaluation](#evaluation) - [Default Lookup](#default-lookup) - [Passing State](#passing-state) - - [Deep Lookup / Nested State / Lists](#deep-lookup-nested-state-lists) - - [Prefixed Data](#prefixed-data) - - [Attributes Access](#attributes-access) - - [Custom Lookup And Value Passing](#custom-lookup-and-value-passing) - - [Lazy Evaluation](#lazy-evaluation) -- [Details](#details) - - [Debugging Lookups](#debugging-lookups) - - [Enabling/Disabling of Branches](#enabling-disabling-of-branches) - - [Building Conditions From Text](#building-conditions-from-text) - - [Grammar](#grammar) - - [Atomic Conditions](#atomic-conditions) - - [Condition Operators](#condition-operators) - - [Using Symbolic Operators](#using-symbolic-operators) - - [Extending Condition Operators](#extending-condition-operators) - - [Negation `not`](#negation-not) - - [Reversal `rev`](#reversal-rev) - - [Wrapping Condition Operators](#wrapping-condition-operators) - - [Global Wrapping](#global-wrapping) - - [Condition Local Wrapping](#condition-local-wrapping) - - [Combining Operations](#combining-operations) - - [Nesting](#nesting) - - [Tokenizing Details](#tokenizing-details) - - [Functioning](#functioning) - - [Separator `sep`](#separator-sep) - - [Apostrophes](#apostrophes) - - [Escaping](#escaping) - - [Building](#building) - - [Autoconv: Casting of values into python simple types](#autoconv-casting-of-values-into-python-simple-types) - - [Context On Demand](#context-on-demand) -- [Lookup Providers](#lookup-providers) - - [Accepted Signatures](#accepted-signatures) - - [Parametrized Lookup Functions](#parametrized-lookup-functions) - - [Namespace](#namespace) - - [Caching](#caching) - - [Extensions](#extensions) -- [Named Conditions: Qualification](#named-conditions-qualification) - - [Options](#options) - - [Partial Evaluation](#partial-evaluation) -- [Streaming Data](#streaming-data) - - [Filtering](#filtering) - - [Streaming Classification](#streaming-classification) - - [Selective Classification](#selective-classification) - - [Treating of Booleans (Conditions, Not Names)](#treating-of-booleans-conditions-not-names) - - [Asyncronous Operations](#asyncronous-operations) - - [Asyncronous Filter](#asyncronous-filter) + - [Deep Lookup / Nested State / Lists](#deep-lookup-nested-state-lists) + - [Lookup Performance: Prebuilt Deep Getters](#lookup-performance-prebuilt-deep-getters) + - [Best Practices](#best-practices) + - [Prefixed Data](#prefixed-data) + - [Attributes Access](#attributes-access) + - [Custom Lookup And Value Passing](#custom-lookup-and-value-passing) + - [Lazy Evaluation](#lazy-evaluation) + - [Condition Operators (Comparators)](#condition-operators-comparators) + - [Using Symbolic Operators](#using-symbolic-operators) + - [Extending Condition Operators](#extending-condition-operators) + - [Negation `not`](#negation-not) + - [Reversal `rev`](#reversal-rev) + - [Wrapping Condition Operators](#wrapping-condition-operators) + - [Global Wrapping](#global-wrapping) + - [Condition Local Wrapping](#condition-local-wrapping) + - [Combining Operations](#combining-operations) + - [Details](#details) + - [Debugging Lookups](#debugging-lookups) + - [Enabling/Disabling of Branches](#enabling-disabling-of-branches) + - [Building Conditions From Text](#building-conditions-from-text) + - [Grammar](#grammar) + - [Atomic Conditions](#atomic-conditions) + - [Nesting](#nesting) + - [Tokenizing Details](#tokenizing-details) + - [Functioning](#functioning) + - [Separator `sep`](#separator-sep) + - [Apostrophes](#apostrophes) + - [Escaping](#escaping) + - [Building](#building) + - [Autoconv: Casting of values into python simple types](#autoconv-casting-of-values-into-python-simple-types) + - [Context On Demand](#context-on-demand) + - [Lookup Providers](#lookup-providers) + - [Accepted Signatures](#accepted-signatures) + - [Parametrized Lookup Functions](#parametrized-lookup-functions) + - [Namespace](#namespace) + - [Caching](#caching) + - [Extensions](#extensions) + - [Named Conditions: Qualification](#named-conditions-qualification) + - [Options](#options) + - [Partial Evaluation](#partial-evaluation) + - [Streaming Data](#streaming-data) + - [Filtering](#filtering) + - [Streaming Classification](#streaming-classification) + - [Selective Classification](#selective-classification) + - [Treating of Booleans (Conditions, Not Names)](#treating-of-booleans-conditions-not-names) + - [Asyncronous Operations](#asyncronous-operations) + - [Asyncronous Filter](#asyncronous-filter) @@ -171,6 +173,7 @@ compared with handcrafted list comprehensions. ## Parsing pycond parses the condition expressions according to a set of constraints given to the parser in the `tokenizer` function. + The result of the tokenizer is given to the builder. @@ -181,7 +184,6 @@ import pycond as pc expr = '[a eq b and [c lt 42 or foo eq bar]]' cond = pc.to_struct(pc.tokenize(expr, sep=' ', brkts='[]')) print('filter:', cond) - # test: data = [ {'a': 'b', 'c': 1, 'foo': 42}, @@ -201,9 +203,9 @@ matching: [{'a': 'b', 'c': 1, 'foo': 42}] ## Building -After parsing the builder is assembling a nested set of operator functions, +After parsing, the builder is assembling a nested set of operator functions, combined via combining operators. The functions are partials, i.e. not yet -evaluated but information about the necessary keys is already available: +evaluated - but information about the necessary keys is already available: @@ -215,7 +217,7 @@ assert f(state={'foo': 'bar'}) == True -Note that the `make_filter` function is actually a convencience function for +Note: The `make_filter` function is actually a convencience function for `parse_cond`, ignoring that meta information and calling with `state=` @@ -238,14 +240,16 @@ assert pc.pycond(cond_as_json)(state={'a': 'b'}) == True -# Evaluation +## Evaluation -The result of the builder is a 'pycondition', which can be run many times against varying state of the system. +The result of the builder is a 'pycondition', i.e. a function which can be run many times against varying state of the system. How state is evaluated is customizable at build and run time. ## Default Lookup -The default is to get lookup keys within expressions from an initially empty `State` dict within the module - which is *not* thread safe, i.e. not to be used in async or non cooperative multitasking environments. +"Lookup" denotes the process of deriving the actual values to evaluate, from a given state. Can be simple gets, getattrs, walks into the structure - or arbitrary, via custom lookup functions. + +The default is to *get* lookup keys within expressions from an initially empty `State` dict within the module. This is *not* thread safe, i.e. not to be used in async or non cooperative multitasking environments. @@ -261,7 +265,7 @@ assert f() == True ## Passing State -Use the state argument at evaluation: +Using a state argument at evaluation *is* thread safe: ```python @@ -269,7 +273,7 @@ assert pc.pycond('a gt 2')(state={'a': 42}) == True assert pc.pycond('a gt 2')(state={'a': -2}) == False ``` -### Deep Lookup / Nested State / Lists +## Deep Lookup / Nested State / Lists You may supply a path seperator for diving into nested structures like so: @@ -291,15 +295,57 @@ c = [[['a', 'b', 0, 'c'], 'eq', 1], 'and', 'a'] f, nfos = pc.parse_cond(c) # sorting order for keys: tuples at end, sorted by len, rest default py sorted: assert f(state=m) == True and nfos['keys'] == ['a', ('a', 'b', 0, 'c')] -print(nfos) ``` -Output: + +- The structure may also contain objects, then we use getattribute to get to the next value. + +- `deep="."` is actually just convience notation for supplying the following "lookup function" (see below): + + + +```python +m = {'a': {'b': [{'c': 1}]}} +assert pc.pycond('a.b.0.c', lookup=pc.state_get_deep)(state=m) == True ``` -{'keys': ['a', ('a', 'b', 0, 'c')]} + + +### Lookup Performance: Prebuilt Deep Getters + +The value lookup within nested structures can be stored into item and attribute getters (or , alternatively, an evaluated synthesized lookup function), built, when the first item has a matching structure. + +- Upside: [Performance](./test/test_getter_perf.py) is a few times better compared to when the structure of items is explored each time, as with the 'deep' parameter. +- Downside: The lookup remains as built for the first structurely matching item. Schematic changes like from a key within a dict to an attribute will not except but deliver always False for the + actual condition value matching. + +- `pycond.Getters.state_get_deep2`: A list of item and attribute getters is built at first successfull lookup evaluation. +- `pycond.Getters.state_get_evl`: An expression like "lambda state=state['a'].b[0]['c']" is built and evaluated, then applied to the items. + - Fastest way to get to the values at evaluation time. + - Security: Round brackets within key names are forbidden and deliver always false - but an eval is an eval i.e. potentially evil. + +These two additional "deep" lookup functions are conveniently made accessible by supplying a `deep2` or `deep3` argument: + + + +```python +m = {'a': {'b': [{'c': 1}]}} +# 3 times faster than deep. Safe. +assert pc.pycond('a.b.0.c', deep2='.')(state=m) == True +# 4 times faster than deep. Eval involved. +assert pc.pycond('a.b.0.c', deep3='.')(state=m) == True ``` -## Prefixed Data +The evaluation results for the keys are cached. The cache is cleared after 1Mio entries but can be cleared manually via `pc.clear_caches()` any time before that. + +### Best Practices + +- Lookup keys change all the time, not many items checked for specific key: Use `deep` +- Many items to be checked with same keys, input from untrusted users: Use `deep2` +- Many items to be checked with same keys, input from trusted users: Use `deep3` + + + +## Prefixed Data When data is passed through processing pipelines, it often is passed with headers. So it may be useful to pass a global prefix to access the payload like so: @@ -310,7 +356,7 @@ m = {'payload': {'b': [{'c': 1}], 'id': 123}} assert pc.pycond('b.0.c', deep='.', prefix='payload')(state=m) == True ``` -## Attributes Access +## Attributes Access Since version 20210221 we try attributes when objects are not dicts: @@ -325,11 +371,8 @@ cond = [['obj.val.a', 'eq', 'b']] assert pc.pycond(cond, deep='.', prefix='payload')(state=m) == True ``` -Perf Tip: When you have deep nested class or object hirarchies, then a custom lookup -function will be faster than pycond's default lookup, which splits the key into parts, -then works its way in via getitem, getattr, from root. -## Custom Lookup And Value Passing +## Custom Lookup And Value Passing You can supply your own function for value acquisition. @@ -360,9 +403,9 @@ user check. locals: {'k': 'last_host', 'v': 'host', 'req': {'host': 'somehost'}, ``` > as you can see in the example, the state parameter is just a convention -for `pyconds'` [default lookup function][pycond.py#195]. +for `pyconds'` [title: default lookup function, fmatch:pycond.py, lmatch:def state_get] < SRC > . -## Lazy Evaluation +## Lazy Evaluation This is avoiding unnecessary calculations in many cases: @@ -405,95 +448,11 @@ Output: ['a', 'baz'] ``` -Remember that all keys occurring in a condition (which may be provided by the user at runtime) are returned by the condition parser. Means that building of evaluation contexts [can be done](#context-on-demand-and-lazy-evaluation), based on the data actually needed and not more. - -# Details - -## Debugging Lookups - -pycond provides a key getter which prints out every lookup. - - -```python -f = pc.pycond('[[a eq b] or foo eq bar] or [baz eq bar]', lookup=pc.dbg_get) -assert f(state={'foo': 'bar'}) == True -``` -Output: - -``` -Lookup: a b -> None -Lookup: foo bar -> bar -``` - -## Enabling/Disabling of Branches - -Insert booleans like shown: - - -```python -f = pc.pycond(['foo', 'and', ['bar', 'eq', 1]]) -assert f(state={'foo': 1}) == False -f = pc.pycond(['foo', 'and', [True, 'or', ['bar', 'eq', 1]]]) -assert f(state={'foo': 1}) == True -``` - -## Building Conditions From Text - -Condition functions are created internally from structured expressions - -but those are [hard to type](#lazy-dynamic-context-assembly), -involving many apostropies. - -The text based condition syntax is intended for situations when end users -type them into text boxes directly. - -### Grammar - -Combine atomic conditions with boolean operators and nesting brackets like: - -``` -[ ] [ [ .... -``` - -### Atomic Conditions - -``` -[not] [ [rev] [not] ] -``` -- When just `lookup_key` is given, then `co` is set to the `truthy` function: -```python -def truthy(key, val=None): - return operatur.truth(k) -``` - -so such an expression is valid and True: - - - -```python -pc.State.update({'foo': 1, 'bar': 'a', 'baz': []}) -assert pc.pycond('[ foo and bar and not baz]')() == True -``` - -- When `not lookup_key` is given, then `co` is set to the `falsy` - function: - - - -```python -m = {'x': 'y', 'falsy_val': {}} -# normal way -assert pc.pycond(['foo', 'eq', None])(state=m) == True -# using "not" as prefix: -assert pc.pycond('not foo')(state=m) == True -assert pc.pycond(['not', 'foo'])(state=m) == True -assert pc.pycond('not falsy_val')(state=m) == True -assert pc.pycond('x and not foo')(state=m) == True -assert pc.pycond('y and not falsy_val')(state=m) == False -``` +Remember that all keys occurring in a condition(which may be provided by the user at runtime) are returned by the condition parser. Means that building of evaluation contexts[can be done]( # context-on-demand-and-lazy-evaluation), based on the data actually needed and not more. -## Condition Operators +## Condition Operators (Comparators) -All boolean [standardlib operators](https://docs.python.org/2/library/operator.html) +All boolean[standardlib operators](https://docs.python.org/2/library/operator.html) are available by default: @@ -569,7 +528,7 @@ for k in 'nr', 'str': -### Using Symbolic Operators +### Using Symbolic Operators By default pycond uses text style operators. @@ -596,7 +555,7 @@ except: > Operator namespace(s) should be assigned at process start, they are global. -### Extending Condition Operators +### Extending Condition Operators @@ -607,7 +566,7 @@ assert pc.pycond('a maybe b')() in (True, False) ``` -### Negation `not` +### Negation `not` Negates the result of the condition operator: @@ -620,7 +579,7 @@ assert pc.pycond('foo not eq abc')() == False ``` -### Reversal `rev` +### Reversal `rev` Reverses the arguments before calling the operator @@ -634,9 +593,10 @@ assert pc.pycond('foo rev contains abc')() == True > `rev` and `not` can be combined in any order. -### Wrapping Condition Operators +### Wrapping Condition Operators + +#### Global Wrapping -#### Global Wrapping You may globally wrap all evaluation time condition operations through a custom function: @@ -662,7 +622,7 @@ pc.ops_use_symbolic_and_txt() You may compose such wrappers via repeated application of the `run_all_ops_thru` API function. -#### Condition Local Wrapping +### Condition Local Wrapping This is done through the `ops_thru` parameter as shown: @@ -683,7 +643,7 @@ assert f() == True > Using `ops_thru` is a good way to debug unexpected results, since you > can add breakpoints or loggers there. -## Combining Operations +### Combining Operations You can combine single conditions with @@ -697,21 +657,106 @@ The combining functions are stored in `pycond.COMB_OPS` dict and may be extended > Do not use spaces for the names of combining operators. The user may use them but they are replaced at before tokenizing time, like `and not` -> `and_not`. -### Nesting +## Details + +### Debugging Lookups + +pycond provides a key getter which prints out every lookup. + + +```python +f = pc.pycond('[[a eq b] or foo eq bar] or [baz eq bar]', lookup=pc.dbg_get) +assert f(state={'foo': 'bar'}) == True +``` +Output: + +``` +Lookup: a b -> None +Lookup: foo bar -> bar +``` + +### Enabling/Disabling of Branches + +Insert booleans like shown: + + +```python +f = pc.pycond(['foo', 'and', ['bar', 'eq', 1]]) +assert f(state={'foo': 1}) == False +f = pc.pycond(['foo', 'and', [True, 'or', ['bar', 'eq', 1]]]) +assert f(state={'foo': 1}) == True +``` + +### Building Conditions From Text + +Condition functions are created internally from structured expressions - +but those are[hard to type]( # lazy-dynamic-context-assembly), +involving many apostropies. + +The text based condition syntax is intended for situations when end users +type them into text boxes directly. + +#### Grammar + +Combine atomic conditions with boolean operators and nesting brackets like: + +``` +[< atom1 > < and | or | and not|... > ] < and|or... > [ [ < atom3 > .... +``` + +#### Atomic Conditions + +``` +[not] < lookup_key > [[rev] [not] < condition operator (co) > ] +``` +- When just `lookup_key` is given, then `co` is set to the `truthy` function: +```python +def truthy(key, val=None): + return operatur.truth(k) +``` + +so such an expression is valid and True: + + + +```python +pc.State.update({'foo': 1, 'bar': 'a', 'baz': []}) +assert pc.pycond('[ foo and bar and not baz]')() == True +``` + +- When `not lookup_key` is given, then `co` is set to the `falsy` + function: + + + +```python +m = {'x': 'y', 'falsy_val': {}} +# normal way +assert pc.pycond(['foo', 'eq', None])(state=m) == True +# using "not" as prefix: +assert pc.pycond('not foo')(state=m) == True +assert pc.pycond(['not', 'foo'])(state=m) == True +assert pc.pycond('not falsy_val')(state=m) == True +assert pc.pycond('x and not foo')(state=m) == True +assert pc.pycond('y and not falsy_val')(state=m) == False +``` + + +#### Nesting Combined conditions may be arbitrarily nested using brackets "[" and "]". > Via the `brkts` config parameter you may change those to other separators at build time. -## Tokenizing Details +### Tokenizing Details > Brackets as strings in this flat list form, e.g. `['[', 'a', 'and' 'b', ']'...]` -### Functioning +#### Functioning The tokenizers job is to take apart expression strings for the builder. -### Separator `sep` +#### Separator `sep` Separates the different parts of an expression. Default is ' '. @@ -730,13 +775,15 @@ Bracket characters do not need to be separated, the tokenizer will do: ```python # equal: -assert pc.pycond('[[a eq 42] and b]')() == pc.pycond('[ [ a eq 42 ] and b ]')() +assert ( + pc.pycond('[[a eq 42] and b]')() == pc.pycond('[ [ a eq 42 ] and b ]')() +) ``` > The condition functions themselves do not evaluate equal - those > had been assembled two times. -### Apostrophes +#### Apostrophes By putting strings into Apostrophes you can tell the tokenizer to not further inspect them, e.g. for the seperator: @@ -748,7 +795,7 @@ assert pc.pycond('a eq "Hello World"')() == True ``` -### Escaping +#### Escaping Tell the tokenizer to not interpret the next character: @@ -760,9 +807,9 @@ assert pc.pycond('b eq Hello\ World')() == True ``` -### Building +### Building -### Autoconv: Casting of values into python simple types +#### Autoconv: Casting of values into python simple types Expression string values are automatically cast into bools and numbers via the public `pycond.py_type` function. @@ -780,7 +827,7 @@ assert pc.pycond('a eq 42', autoconv=False)() == True ``` -If you do not want to provide a custom lookup function (where you can do what you want) +If you do not want to provide a custom lookup function(where you can do what you want) but want to have looked up keys autoconverted then use: @@ -792,7 +839,7 @@ for id in '1', 1: ``` -## Context On Demand +## Context On Demand Often the conditions are in user space, applied on data streams under the developer's control only at development time. @@ -801,7 +848,7 @@ The end user might pick only a few keys from many offered within an API. pycond's `ctx_builder` allows to only calculate those keys at runtime, the user decided to base conditions upon: -At condition build time hand over a namespace for *all* functions which +At condition build time hand over a namespace for *all * functions which are available to build the ctx. `pycon` will return a context builder function for you, calling only those functions @@ -877,7 +924,7 @@ class ApiCtxFuncs: return 0 if sys.version_info[0] < 3: - # we don't think it is a good idea to make the getter API stateful: + # we don't think it is a good idea to make the getter API stateful ;-) p2m.convert_to_staticmethods(ApiCtxFuncs) f, nfos = pc.parse_cond(cond, ctx_provider=ApiCtxFuncs) @@ -909,23 +956,23 @@ Calculating cur_hour Calculating cur_q Calculating (expensive) delta_q Calculating dt_last_enforce -Calc.Time (delta_q was called twice): 0.2011 +Calc.Time (delta_q was called twice): 0.2006 ``` -# Lookup Providers +## Lookup Providers ContextBuilders are interesting but we can do better. -We still calculated values for keys which might (dependent on the data) be not needed in dead ends of a lazily evaluated condition. +We still calculated values for keys which might(dependent on the data) be not needed in dead ends of a lazily evaluated condition. Lets avoid calculating these values, remembering the [custom lookup function](#custom-lookup-and-value-passing) feature. This is where lookup providers come in, providing namespaces for functions to be called conditionally. -Pycond [treats the condition keys as function names][pycond.py#517] within that namespace and calls them, when needed. +Pycond [treats the condition keys as function names][pycond.py#614] within that namespace and calls them, when needed. -## Accepted Signatures +### Accepted Signatures Lookup provider functions may have the following signatures: @@ -959,7 +1006,7 @@ class F: # applied al def f4(*a, **kw): """ - Full variant (always when varargs are involved) + Full variant(always when varargs are involved) """ return a[3]['d'], 'foo' @@ -977,7 +1024,7 @@ f = pc.pycond( assert f(state={'a': 42, 'b': 43, 'c': 100, 'd': 'foo'}) == True ``` -## Parametrized Lookup Functions +### Parametrized Lookup Functions Via the 'params' parameter you may supply keyword args to lookup functions: @@ -987,17 +1034,19 @@ class F: def hello(k, v, cfg, data, count, **kw): return data['foo'] == count, 0 -m = pc.pycond([':hello'], lookup_provider=F, params={'hello': {'count': 2}})(state={'foo': 2}) +m = pc.pycond([':hello'], lookup_provider=F, params={'hello': {'count': 2}})( + state={'foo': 2} +) assert m == True ``` -## Namespace +### Namespace -- Lookup functions can be found in nested class hirarchies or dicts. Separator is colon (':') -- As shown above, if they are flat within a toplevel class or dict you should still prefix with ':', to get build time exception (MissingLookupFunction) when not present +- Lookup functions can be found in nested class hirarchies or dicts. Separator is colon(':') +- As shown above, if they are flat within a toplevel class or dict you should still prefix with ':', to get build time exception(MissingLookupFunction) when not present - You can switch that behaviour off per condition build as config arg, as shown below -- You can switch that behaviour off globally via `pc.prefixed_lookup_funcs = False` +- You can switch that behaviour off globally via `pc.prefixed_lookup_funcs=False` Warning: This is a breaking API change with pre-20200610 versions, where the prefix was not required to find functions in, back then, only flat namespaces. Use the global switch after import to get the old behaviour. @@ -1005,10 +1054,12 @@ Warning: This is a breaking API change with pre-20200610 versions, where the pre ```python class F: - def a(data): return data['foo'] + def a(data): + return data['foo'] class inner: - def b(data): return data['bar'] + def b(data): + return data['bar'] m = {'c': {'d': {'func': lambda data: data['baz']}}} @@ -1102,21 +1153,21 @@ The output demonstrates that we did not even call the value provider functions f NOTE: Instead of providing a class tree you may also provide a dict of functions as `lookup_provider_dict` argument, see `qualify` examples below. -## Caching +## Caching Note: Currently you cannot override these defaults. Drop an issue if you need to. - Builtin state lookups: Not cached -- Custom `lookup` functions: Not cached (you can implement caching within those functions) +- Custom `lookup` functions: Not cached(you can implement caching within those functions) - Lookup provider return values: Cached, i.e. called only once, per data set -- Named condition sets (see below): Cached +- Named condition sets(see below): Cached -## Extensions +## Extensions -We deliver a few lookup function [extensions][pycond.py#614] +We deliver a few lookup function [extensions][pycond.py#711] - for time checks -- for os.environ checks (re-evaluated at runtime) +- for os.environ checks(re-evaluated at runtime) @@ -1142,7 +1193,7 @@ assert f(state={'a': 1}) == True -# Named Conditions: Qualification +## Named Conditions: Qualification Instead of just delivering booleans, pycond can be used to determine a whole set of information about data declaratively, like so: @@ -1217,7 +1268,7 @@ Running {'thrd': ['k', 'or', ':first'], 'listed': [['foo'], ['c', 'eq', 'foo']], WARNING: For performance reasons there is no built in circular reference check. You'll run into python's built in recursion checker! -## Options +## Options - into: Put the matched named conditions into the original data - prefix: Work from a prefix nested in the root @@ -1239,7 +1290,9 @@ class F: def func(*a, **kw): return True, 0 -q = lambda d, **kw: pc.qualify(conds, lookup_provider=F, prefixed_lookup_funcs=False, **kw)(d) +q = lambda d, **kw: pc.qualify( + conds, lookup_provider=F, prefixed_lookup_funcs=False, **kw +)(d) m = q({'bar': 1}) assert m == {0: False, 1: True, 2: True, 3: True, 'n': True} @@ -1251,7 +1304,8 @@ assert m == { 'conds': {0: False, 1: True, 2: True, 3: True, 'n': True}, } -def msg(): return {'bar': 1, 'pl': {'a': 1}} +def msg(): + return {'bar': 1, 'pl': {'a': 1}} # add_cached == True -> it's put into the cond results: m = q(msg(), into='conds', add_cached=True) @@ -1288,7 +1342,7 @@ assert m == { -## Partial Evaluation +## Partial Evaluation If you either supply a key called 'root' OR supply it as argument to `qualify`, pycond will only evaluate named conditions required to calculate the root key: @@ -1337,9 +1391,9 @@ assert called == [{'a': 1}, {'b': 1}] This means pycond can be used as a lightweight declarative function dispatching framework. -# Streaming Data +## Streaming Data -Since version 20200601 and Python 3.x versions, pycond can deliver [ReactiveX](https://github.com/ReactiveX/RxPY) compliant stream operators. +Since version 20200601 and Python 3.x versions, pycond can deliver[ReactiveX](https://github.com/ReactiveX/RxPY) compliant stream operators. Lets first set up a test data stream, by defining a function `rx_setup` like so: @@ -1371,7 +1425,9 @@ def push_through(*test_pipe, items=4): # turns the ints into dicts: {'i': 1}, then {'i': 2} and so on: # (we start from 1, the first 0 we filter out) - stream = stream.pipe(rx.filter(lambda i: i > 0), rx.map(lambda i: {'i': i})) + stream = stream.pipe( + rx.filter(lambda i: i > 0), rx.map(lambda i: {'i': i}) + ) # defines the stream through the tested operators: test_pipe = test_pipe + (compl,) @@ -1407,7 +1463,7 @@ assert r == [{'i': 1}, {'i': 2}, {'i': 3}] -> test setup works. -## Filtering +### Filtering This is the most simple operation: A simple stream filter. @@ -1437,7 +1493,7 @@ Output: Full messages passed: [{'payload': {'i': 1}}, {'payload': {'i': 3}}, {'payload': {'i': 5}}, {'payload': {'i': 7}}] ``` -## Streaming Classification +### Streaming Classification Using named condition dicts we can classify data, i.e. tag it, in order to process subsequently: @@ -1472,9 +1528,9 @@ run(2) Normally the data has headers, so thats a good place to keep the classification tags. -### Selective Classification +### Selective Classification -We fall back to an alternative condition evaluation (which could be a function call) *only* when a previous condition evaluation returns something falsy - by providing a *root condition*. +We fall back to an alternative condition evaluation(which could be a function call) * only * when a previous condition evaluation returns something falsy - by providing a * root condition*. When it evaluated, possibly requiring evaluation of other conditions, we return: @@ -1498,7 +1554,7 @@ assert r == [ ] ``` -#### Treating of Booleans (Conditions, Not Names) +## Treating of Booleans (Conditions, Not Names) For the special case of booleans in a condition list we do not treat them as names. @@ -1514,16 +1570,16 @@ res = qs({'a': 1}) assert res == {1: True, 2: False} ``` -## Asyncronous Operations +## Asyncronous Operations WARNING: Early Version. Only for the gevent platform. Selective classification allows to call condition functions only when other criteria are met. That makes it possible to read e.g. from a database only when data is really required - and not always, "just in case". -pycond allows to define, that blocking operations should be run *async* within the stream, possibly giving up order. +pycond allows to define, that blocking operations should be run * async* within the stream, possibly giving up order. -### Asyncronous Filter +### Asyncronous Filter First a simple filter, which gives up order but does not block: @@ -1561,13 +1617,13 @@ Output: ``` item 2: 0.011s item 3: 0.022s -item 4: 0.033s -item 5: 0.044s +item 4: 0.034s +item 5: 0.045s item 1: 0.049s <----- not in order, blocked -item 6: 0.055s -item 7: 0.066s -item 8: 0.077s -item 9: 0.089s +item 6: 0.057s +item 7: 0.068s +item 8: 0.079s +item 9: 0.090s ``` Finally asyncronous classification, i.e. evaluation of multiple conditions: @@ -1575,7 +1631,8 @@ Finally asyncronous classification, i.e. evaluation of multiple conditions: ```python -def _thn(msg, data): return print('thread:', cur_thread().name, msg, data) +def _thn(msg, data): + return print('thread:', cur_thread().name, msg, data) # push_through just runs a stream of {'i': } through a given operator: Rx, rx, push_through = rx_setup() @@ -1597,7 +1654,7 @@ conds = [ class F: """ Namespace for condition lookup functions. - You may also pass a dict (lookup_provider_dict) + You may also pass a dict(lookup_provider_dict) We provide the functions for 'odd' and 'blocking'. """ @@ -1662,18 +1719,18 @@ assert [t['i'] for t in errors] == [2, 5] Output: ``` -thread: Thread-10054 odd {'i': 1} -thread: Dummy-10056 blocking {'i': 1} -thread: Thread-10055 odd {'i': 2} -thread: Dummy-10058 blocking {'i': 2} -thread: Thread-10057 odd {'i': 3} -thread: Dummy-10060 blocking {'i': 3} -thread: Thread-10059 odd {'i': 4} -thread: Thread-10061 odd {'i': 5} -thread: Dummy-10063 blocking {'i': 5} -thread: Thread-10062 odd {'i': 6} -thread: Thread-10064 odd {'i': 7} -thread: Dummy-10066 blocking {'i': 7} +thread: Thread-55 odd {'i': 1} +thread: Dummy-57 blocking {'i': 1} +thread: Thread-56 odd {'i': 2} +thread: Dummy-59 blocking {'i': 2} +thread: Thread-58 odd {'i': 3} +thread: Dummy-61 blocking {'i': 3} +thread: Thread-60 odd {'i': 4} +thread: Thread-62 odd {'i': 5} +thread: Dummy-64 blocking {'i': 5} +thread: Thread-63 odd {'i': 6} +thread: Thread-65 odd {'i': 7} +thread: Dummy-67 blocking {'i': 7} ``` @@ -1683,6 +1740,5 @@ thread: Dummy-10066 blocking {'i': 7} -[pycond.py#195]: https://github.com/axiros/pycond/blob/f533b55599f417b02121caaf3d82facce4dc565c/pycond.py#L195 -[pycond.py#517]: https://github.com/axiros/pycond/blob/f533b55599f417b02121caaf3d82facce4dc565c/pycond.py#L517 -[pycond.py#614]: https://github.com/axiros/pycond/blob/f533b55599f417b02121caaf3d82facce4dc565c/pycond.py#L614 \ No newline at end of file +[pycond.py#614]: https://github.com/axiros/pycond/blob/54fea4c6b0cdc62bcf0b984214649344b1a77230/pycond.py#L614 +[pycond.py#711]: https://github.com/axiros/pycond/blob/54fea4c6b0cdc62bcf0b984214649344b1a77230/pycond.py#L711 \ No newline at end of file diff --git a/pycond.py b/pycond.py index 37550fe..23eece1 100644 --- a/pycond.py +++ b/pycond.py @@ -193,37 +193,141 @@ def ops_reset(): State = {} -def state_get_deep(key, val, cfg, state=State, deep='.', **kw): - """ - Hotspot! - """ - # FIXME: why split at runtime? - # key maybe already path tuple - or string with deep as seperator: - # Also the try to get list items when part is int can be checked at build time! - parts = key.split(deep) if _is(key, str) else list(key) - while parts: - part = parts.pop(0) - try: - state = state.get(part) - except AttributeError as ex: +class Getters: + def state_get_deep(key, val, cfg, state=State, deep='.', **kw): + """ + Hotspot! + """ + # FIXME: why split at runtime? + # key maybe already path tuple - or string with deep as seperator: + # Also the try to get list items when part is int can be checked at build time! + # parts = key.split(deep) if _is(key, str) else list(key) + parts = key.split(deep) if _is(key, str) else key + for part in parts: try: - i = int(part) - state = state[i] - except ValueError as ex: # i no list index, we try attrs: - state = getattr(state, part, None) - except IndexError as ex: - state = None - if not state: - break - return state, val + state = state.get(part) + except AttributeError as ex: + try: + i = int(part) + state = state[i] + except ValueError as ex: # i no list index, we try attrs: + state = getattr(state, part, None) + except IndexError as ex: + state = None + if not state: + break + return state, val + def state_get(key, val, cfg, state=State, **kw): + # a lookup function can modify key AND value, i.e. returns both: + if _is(key, tuple): + return state_get_deep(key, val, cfg, state, **kw) + else: + return state.get(key), val # default k, v access function + + def dbg_get(key, val, cfg, state=State, *a, **kw): + res = Getters.state_get(key, val, cfg, state, *a, **kw) + val = 'FALSES' if val == FALSES else val + out('Lookup:', key, val, '->', res[0]) + return res + + def _diginto(state, key, sep): + """Helper which is, on the first matching state structure, + delivering the keys we needed""" + g = () + parts = key.split(sep) if _is(key, str) else key + for part in parts: + try: + state = state.get(part) + g += (part,) + except AttributeError as ex: + try: + i = int(part) + state = state[i] + g += (i,) + except ValueError as ex: # i no list index, we try attrs: + state = getattr(state, part, None) + g += ((part, 0),) # attrgetter below + except IndexError as ex: + state = None + if not state: + return None, None + return state, g + + _get_deep2_cache = {} + + def get_deep2(key, val, cfg, state, deep='.', _c=_get_deep2_cache, **kw): + """ + Here we cache the split result + """ + funcs = _c.get(key) + if funcs: + try: + if funcs[0] == True: + for f in funcs[1:]: + state = state[f] + else: + for f in funcs: + state = f(state) # we have itemgetters and attrsgettr + return state, val + except Exception: + return None, val + + state, g = Getters._diginto(state, key, sep=deep) + if state is None: + return state, val + + # we have matching structure => invest in assembling the getter functions + # If there are no getattr (=>no tuple), then just []-ing the values is even faster then using itemgetter: + # so we mark those with a True in the beginning and just remember the values: + if len(_c) > 1000000: + _c.clear() # safety belt + if not any([i for i in g if _is(i, tuple)]): + _c[key] = g = list(g) + g.insert(0, True) + else: + a, i = operator.attrgetter, operator.itemgetter + _c[key] = [a(f[0]) if isinstance(f, tuple) else i(f) for f in g] + return state, val + + _get_deep3_cache = {} + + def get_deep_evl(key, val, cfg, state, deep='.', _c=_get_deep3_cache, **kw): + """ + Fastest, but we must disallow ( ) + """ + funcs = _c.get(key) + if funcs: + try: + return funcs(state), val + except Exception: + return None, val + state, g = Getters._diginto(state, key, sep=deep) + if state is None: + return state, val + + g = [ + f'["{p}"]' if _is(p, str) else f'.{p[0]}' if _is(p, tuple) else f'[{p}]' + for p in g + ] + g = ''.join(g) + g = f'lambda s: s{g}' + if '(' in g and ')' in g: + g = 'lambda s: None' + if len(_c) > 1000000: + _c.clear() # safety belt + _c[key] = eval(g) + return state, val + + +def clear_caches(): + Getters._get_deep2_cache.clear() + Getters._get_deep3_cache.clear() -def state_get(key, val, cfg, state=State, **kw): - # a lookup function can modify key AND value, i.e. returns both: - if _is(key, tuple): - return state_get_deep(key, val, cfg, state, **kw) - else: - return state.get(key), val # default k, v access function + +state_get_deep = Getters.state_get_deep +dbg_get = Getters.dbg_get +state_get = Getters.state_get # if val in these we deliver False: @@ -232,13 +336,6 @@ def state_get(key, val, cfg, state=State, **kw): FALSES = (None, False, '', 0, {}, [], ()) -def dbg_get(key, val, cfg, state=State, *a, **kw): - res = state_get(key, val, cfg, state, *a, **kw) - val = 'FALSES' if val == FALSES else val - out('Lookup:', key, val, '->', res[0]) - return res - - def out(*m): return print(' '.join([str(s) for s in m])) @@ -677,6 +774,10 @@ def parse_cond(cond, lookup=state_get, **cfg): if cfg.get('deep'): lookup = partial(state_get_deep, deep=cfg['deep']) + elif cfg.get('deep2'): + lookup = partial(Getters.get_deep2, deep=cfg['deep2']) + elif cfg.get('deep3'): + lookup = partial(Getters.get_deep_evl, deep=cfg['deep3']) cfg['lookup'] = lookup cfg['lookup_args'] = sig_args(lookup) @@ -764,7 +865,9 @@ def _(v): except: pass - return True if v == 'true' else False if v == 'false' else None if v == 'None' else _(v) + return ( + True if v == 'true' else False if v == 'false' else None if v == 'None' else _(v) + ) # ---------------------- Following Code Only for Parsing STRING Conditions Into Structs @@ -1035,7 +1138,10 @@ def run_conds( for k, v in conds: b = built[k] if _is(v, list): - r[k] = [run_conds(data, c, b[i], is_single, **kw) for i, c in zip(range(len(v)), v)] + r[k] = [ + run_conds(data, c, b[i], is_single, **kw) + for i, c in zip(range(len(v)), v) + ] else: r[k] = m = run_conds(data, v, b, is_single, **kw) if m: @@ -1262,3 +1368,4 @@ def on_next(x, run_item=run_item): return Rx.merge(Rx.create(subscribe), subj_async_results) return _run + return _run diff --git a/pyproject.toml b/pyproject.toml index 74be660..806ed1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.ruff] -line-length = 110 +line-length = 90 ignore = ["E501", "E202"] [tool.blue] -line-length = 110 +line-length = 90 diff --git a/tests/test_getter_perf.py b/tests/test_getter_perf.py new file mode 100644 index 0000000..d984b12 --- /dev/null +++ b/tests/test_getter_perf.py @@ -0,0 +1,40 @@ +import time +from pycond import parse_cond + +_is = isinstance + +now = time.time + + +def test_comp_perf(): + """6 times faster deep lookups when preassembled + at first match + """ + now = time.time + s = {'a': {'b': [{'c': 42}]}} + + class t: + a = s + + # S = {'A': {'a': s}} + S = {'A': t} + + count = 100000 + + def run(count=count, **kw): + pc = parse_cond('A.a.a.b.0.c eq 42', **kw)[0] + t0 = now() + for i in range(count): + assert pc(state=S) + return now() - t0 + + dt1 = run(deep='.') + dt2 = run(deep2='.') + dt3 = run(deep3='.') + print('Cached item getter perf vs get_deep:', dt1 / dt2) + print('Eval perf vs get_deep:', dt1 / dt3) + assert 2 * dt2 < dt1 + + +if __name__ == '__main__': + test_comp_perf() diff --git a/tests/test_perf.py b/tests/test_perf.py index 96302b4..2f58cce 100644 --- a/tests/test_perf.py +++ b/tests/test_perf.py @@ -1,16 +1,21 @@ -''' +""" Abusing the travis environment param. possiblities to compare a few perf. statements... E.g. Did they change 'is' vs '==' in Py2 vs Py3 ? And Is there a significant effect? -''' -print('\n\nSome Performance Tests\n\n') -from time import time -from timeit import timeit, Timer -import unittest -from functools import partial -from collections import OrderedDict as OD +""" + +import time import sys +from collections import OrderedDict as OD +from functools import partial +import unittest +from operator import itemgetter +from timeit import timeit, Timer +from time import time + +print('\n\nSome Performance Tests\n\n') + PY2 = sys.version_info[0] == 2 range = xrange if PY2 else range @@ -18,14 +23,15 @@ number = 10 repeat = 10 R = [] -class T(unittest.TestCase): + +class T(unittest.TestCase): def init(self, k1, k2, setup=None): print('') self.res = OD({k1: 0}) self.res[k2] = 0 print('\n%s\n' % ('- ' * 40)) - print ('Test: "%s" vs "%s"' % tuple(self.res.keys())) + print('Test: "%s" vs "%s"' % tuple(self.res.keys())) print('\n%s\n' % ('- ' * 40)) if setup: print('setup: %s' % setup) @@ -41,7 +47,7 @@ def tearDown(self): if q > 1: k1, k2 = k2, k1 q = vs[1] / vs[0] - msg = ('=> "%s" is better than "%s" by:' % (k1, k2)) + msg = '=> "%s" is better than "%s" by:' % (k1, k2) print(msg) print('\n>>>> %.2f <<<<<' % q) R.append((q, msg)) @@ -67,14 +73,13 @@ def sum(a, b): return a + b assert lf(42) == pf(42) == 52 """ m = self.init(k1, k2, setup) - self.run_(k1, "for i in range(1000): lf(i)", setup) - self.run_(k2, "for i in range(1000): pf(i)", setup) - + self.run_(k1, 'for i in range(1000): lf(i)', setup) + self.run_(k2, 'for i in range(1000): pf(i)', setup) def test_if_vs_if_is_None(self): - '''if foo:... must check for all kinds of truthyness. + """if foo:... must check for all kinds of truthyness. => is explictly saying e.g. if foo == None better? - ''' + """ k1 = 'if' k2 = 'if is None' @@ -84,9 +89,8 @@ def test_if_vs_if_is_None(self): self.run_(k1, e % '', number=nr) self.run_(k2, e % 'is None', number=nr) - def test_eq_vs_is(self): - '''if foo is None vs if foo == None''' + """if foo is None vs if foo == None""" k1 = 'is None' k2 = '== None' m = self.init(k1, k2) @@ -95,8 +99,6 @@ def test_eq_vs_is(self): self.run_(k1, e % '== None', number=nr) self.run_(k2, e % 'is None', number=nr) - - def test_local_scope(self): k1 = 'global lookup' k2 = 'local scope via function default' @@ -113,7 +115,6 @@ def locl(a, b=A): return b + a self.run_(k2, e % 'locl', setup=setup, number=1000) - if __name__ == '__main__': # tests/test_pycond.py PyCon.test_auto_brackets try: @@ -121,4 +122,4 @@ def locl(a, b=A): return b + a except: print('\nAll Results Again\n') for q, msg in R: - print ('%s\n%.2f' % (msg, q)) + print('%s\n%.2f' % (msg, q)) diff --git a/tests/test_pycond.py b/tests/test_pycond.py index eb05e58..ac1fb30 100755 --- a/tests/test_pycond.py +++ b/tests/test_pycond.py @@ -1,6 +1,12 @@ #!/usr/bin/env python -tt -import unittest, sys, os +import pycond as pc +from pycond import run_all_ops_thru +from pycond import parse_cond, State as S +from pycond import pycond, state_get, dbg_get, OPS, COMB_OPS +import unittest +import sys +import os import operator import time @@ -8,13 +14,12 @@ pth = d(d(os.path.abspath(__file__))) sys.path.insert(0, pth) -from pycond import pycond, state_get, dbg_get, OPS, COMB_OPS -from pycond import parse_cond, State as S -from pycond import run_all_ops_thru keys = [] -eq = lambda _, k, v: _.assertEqual(k, v) + +def eq(_, k, v): + return _.assertEqual(k, v) def parse(cond, *a, **kw): @@ -163,6 +168,30 @@ def my_lu(k, v, req, user, model=model): assert f(req=req, user='joe') == True assert f(req=req, user='foo') == False + def test_any_in_list(s): + # https://github.com/axiros/pycond/issues/3 + def foo(a, b): + return b in a if isinstance(a, set) else a == b + + eq = pc.OPS['eq'] + pc.OPS['eq'] = foo + + def lu(k, v, state): + for part in k.split('.'): + if isinstance(state, list): + state = set({i.get(part) for i in state}) + else: + state = state.get(part) + return state, v + + data = [{'x': [{'a': 1}, {'a': 2}]}, {'x': [{'a': 1}, {'a': 3}]}] + expr = 'x.a eq 2' + f = pc.make_filter(expr, lookup=lu) + filtered = list(filter(f, data)) + assert filtered == [{'x': [{'a': 1}, {'a': 2}]}] + + pc.OPS['eq'] = eq + def test_custom_sep(s, cond='[[foo.eq.b ar]and.not.bar.eq.foo]'): S['foo'] = 'b ar' eq(s, parse(cond, sep='.')[0](), True) @@ -216,7 +245,8 @@ def test_all(s, cond='k1 %s k2'): eq(s, parse(cnd_under)[0](), exp) -val_splitting_get = lambda k, v: (S.get(k), v.split(',')) +def val_splitting_get(k, v): + return (S.get(k), v.split(',')) class TestComparisonOps(T): @@ -462,9 +492,6 @@ def fast_lu(k, v): assert dt1 / dt2 < 8, 'Expected max 8 times slower, is %s' % (dt1 / dt2) -import pycond as pc - - class TokenizerToStruct(T): def test_get_struct1(s): c = '[foo eq bar and [ baz eq foo]]' diff --git a/tests/test_rx.py b/tests/test_rx.py index a6a0867..1898c45 100644 --- a/tests/test_rx.py +++ b/tests/test_rx.py @@ -1,17 +1,20 @@ # we use gevent +import pycond as pc +import gevent +from threading import Event, current_thread as ct +import rx.scheduler.eventloop as e +from rx.scheduler.eventloop import GEventScheduler from gevent import monkey -import time, sys +import time +import sys monkey.patch_all() -import gevent, pycond as pc -from rx.scheduler.eventloop import GEventScheduler -import rx.scheduler.eventloop as e - -from threading import Event, current_thread as ct # _thn = lambda msg, data: print('thread:', cur_thread().name, msg, data) -tn = lambda: ct().name.replace('Thread', '') +def tn(): return ct().name.replace('Thread', '') + + GS = GEventScheduler(gevent) # GS = None @@ -65,10 +68,8 @@ def d(i): t = [l[0]['payload'].pop(b) for b in ['blocking', 'blocking2']] assert t[0] == t[1] assert 'Dummy' in t[0] - assert l == [ - {'mod': {'b2': True, 'b1': True, 'root': True}, 'payload': {'a': 1}} - ] + assert l == [{'mod': {'b2': True, 'b1': True, 'root': True}, 'payload': {'a': 1}}] if __name__ == '__main__': - Tests().test_perf_compare() + Tests().test_rx_async1_prefix() diff --git a/tests/test_tutorial.py b/tests/test_tutorial.py index 24d94be..68dd76a 100644 --- a/tests/test_tutorial.py +++ b/tests/test_tutorial.py @@ -42,6 +42,7 @@ def test_mechanics(self): ## Parsing pycond parses the condition expressions according to a set of constraints given to the parser in the `tokenizer` function. + The result of the tokenizer is given to the builder. """ @@ -52,7 +53,6 @@ def f0(): expr = '[a eq b and [c lt 42 or foo eq bar]]' cond = pc.to_struct(pc.tokenize(expr, sep=' ', brkts='[]')) print('filter:', cond) - # test: data = [ {'a': 'b', 'c': 1, 'foo': 42}, @@ -68,9 +68,9 @@ def f0(): ## Building - After parsing the builder is assembling a nested set of operator functions, + After parsing, the builder is assembling a nested set of operator functions, combined via combining operators. The functions are partials, i.e. not yet - evaluated but information about the necessary keys is already available: + evaluated - but information about the necessary keys is already available: """ @@ -83,7 +83,7 @@ def f1(): """ - Note that the `make_filter` function is actually a convencience function for + Note: The `make_filter` function is actually a convencience function for `parse_cond`, ignoring that meta information and calling with `state=` @@ -105,15 +105,17 @@ def f1_1(): """ - - # Evaluation - The result of the builder is a 'pycondition', which can be run many times against varying state of the system. + ## Evaluation + + The result of the builder is a 'pycondition', i.e. a function which can be run many times against varying state of the system. How state is evaluated is customizable at build and run time. ## Default Lookup - The default is to get lookup keys within expressions from an initially empty `State` dict within the module - which is *not* thread safe, i.e. not to be used in async or non cooperative multitasking environments. + "Lookup" denotes the process of deriving the actual values to evaluate, from a given state. Can be simple gets, getattrs, walks into the structure - or arbitrary, via custom lookup functions. + + The default is to *get* lookup keys within expressions from an initially empty `State` dict within the module. This is *not* thread safe, i.e. not to be used in async or non cooperative multitasking environments. """ @@ -129,7 +131,7 @@ def f2(): ## Passing State - Use the state argument at evaluation: + Using a state argument at evaluation *is* thread safe: """ def f2_1(): @@ -137,7 +139,7 @@ def f2_1(): assert pc.pycond('a gt 2')(state={'a': -2}) == False """ - ### Deep Lookup / Nested State / Lists + ## Deep Lookup / Nested State / Lists You may supply a path seperator for diving into nested structures like so: """ @@ -159,9 +161,56 @@ def f2_2(): f, nfos = pc.parse_cond(c) # sorting order for keys: tuples at end, sorted by len, rest default py sorted: assert f(state=m) == True and nfos['keys'] == ['a', ('a', 'b', 0, 'c')] - print(nfos) """ + + - The structure may also contain objects, then we use getattribute to get to the next value. + + - `deep="."` is actually just convience notation for supplying the following "lookup function" (see below): + + """ + + def f2_20(): + m = {'a': {'b': [{'c': 1}]}} + assert pc.pycond('a.b.0.c', lookup=pc.state_get_deep)(state=m) == True + + """ + + ### Lookup Performance: Prebuilt Deep Getters + + The value lookup within nested structures can be stored into item and attribute getters (or , alternatively, an evaluated synthesized lookup function), built, when the first item has a matching structure. + + - Upside: [Performance](./test/test_getter_perf.py) is a few times better compared to when the structure of items is explored each time, as with the 'deep' parameter. + - Downside: The lookup remains as built for the first structurely matching item. Schematic changes like from a key within a dict to an attribute will not except but deliver always False for the + actual condition value matching. + + - `pycond.Getters.state_get_deep2`: A list of item and attribute getters is built at first successfull lookup evaluation. + - `pycond.Getters.state_get_evl`: An expression like "lambda state=state['a'].b[0]['c']" is built and evaluated, then applied to the items. + - Fastest way to get to the values at evaluation time. + - Security: Round brackets within key names are forbidden and deliver always false - but an eval is an eval i.e. potentially evil. + + These two additional "deep" lookup functions are conveniently made accessible by supplying a `deep2` or `deep3` argument: + + """ + + def f2_201(): + m = {'a': {'b': [{'c': 1}]}} + # 3 times faster than deep. Safe. + assert pc.pycond('a.b.0.c', deep2='.')(state=m) == True + # 4 times faster than deep. Eval involved. + assert pc.pycond('a.b.0.c', deep3='.')(state=m) == True + + """ + The evaluation results for the keys are cached. The cache is cleared after 1Mio entries but can be cleared manually via `pc.clear_caches()` any time before that. + + ### Best Practices + + - Lookup keys change all the time, not many items checked for specific key: Use `deep` + - Many items to be checked with same keys, input from untrusted users: Use `deep2` + - Many items to be checked with same keys, input from trusted users: Use `deep3` + + + ## Prefixed Data When data is passed through processing pipelines, it often is passed with headers. So it may be useful to pass a global prefix to access the payload like so: @@ -188,9 +237,6 @@ class MyObj: assert pc.pycond(cond, deep='.', prefix='payload')(state=m) == True """ - Perf Tip: When you have deep nested class or object hirarchies, then a custom lookup - function will be faster than pycond's default lookup, which splits the key into parts, - then works its way in via getitem, getattr, from root. ## Custom Lookup And Value Passing @@ -217,7 +263,7 @@ def my_lu(k, v, req, user, model=model): """ > as you can see in the example, the state parameter is just a convention - for `pyconds'` [title: default lookup function,fmatch:pycond.py,lmatch:def state_get]. + for `pyconds'` [title: default lookup function, fmatch:pycond.py, lmatch:def state_get] < SRC > . ## Lazy Evaluation @@ -256,89 +302,11 @@ def myget(key, val, cfg, state=None, **kw): print(evaluated) """ - Remember that all keys occurring in a condition (which may be provided by the user at runtime) are returned by the condition parser. Means that building of evaluation contexts [can be done](#context-on-demand-and-lazy-evaluation), based on the data actually needed and not more. - - # Details - - ## Debugging Lookups - - pycond provides a key getter which prints out every lookup. - """ - - def f3_2(): - f = pc.pycond('[[a eq b] or foo eq bar] or [baz eq bar]', lookup=pc.dbg_get) - assert f(state={'foo': 'bar'}) == True - - """ - ## Enabling/Disabling of Branches - - Insert booleans like shown: - """ - - def f3_21(): - f = pc.pycond(['foo', 'and', ['bar', 'eq', 1]]) - assert f(state={'foo': 1}) == False - f = pc.pycond(['foo', 'and', [True, 'or', ['bar', 'eq', 1]]]) - assert f(state={'foo': 1}) == True - - """ - ## Building Conditions From Text - - Condition functions are created internally from structured expressions - - but those are [hard to type](#lazy-dynamic-context-assembly), - involving many apostropies. - - The text based condition syntax is intended for situations when end users - type them into text boxes directly. - - ### Grammar - - Combine atomic conditions with boolean operators and nesting brackets like: - - ``` - [ ] [ [ .... - ``` - - ### Atomic Conditions - - ``` - [not] [ [rev] [not] ] - ``` - - When just `lookup_key` is given, then `co` is set to the `truthy` function: - ```python - def truthy(key, val=None): - return operatur.truth(k) - ``` - - so such an expression is valid and True: - - """ - - def f4(): - pc.State.update({'foo': 1, 'bar': 'a', 'baz': []}) - assert pc.pycond('[ foo and bar and not baz]')() == True - - """ - - When `not lookup_key` is given, then `co` is set to the `falsy` - function: - - """ - - def f4_11(): - m = {'x': 'y', 'falsy_val': {}} - # normal way - assert pc.pycond(['foo', 'eq', None])(state=m) == True - # using "not" as prefix: - assert pc.pycond('not foo')(state=m) == True - assert pc.pycond(['not', 'foo'])(state=m) == True - assert pc.pycond('not falsy_val')(state=m) == True - assert pc.pycond('x and not foo')(state=m) == True - assert pc.pycond('y and not falsy_val')(state=m) == False + Remember that all keys occurring in a condition(which may be provided by the user at runtime) are returned by the condition parser. Means that building of evaluation contexts[can be done]( # context-on-demand-and-lazy-evaluation), based on the data actually needed and not more. - """ - ## Condition Operators + ## Condition Operators (Comparators) - All boolean [standardlib operators](https://docs.python.org/2/library/operator.html) + All boolean[standardlib operators](https://docs.python.org/2/library/operator.html) are available by default: """ @@ -422,6 +390,7 @@ def f7(): ### Wrapping Condition Operators #### Global Wrapping + You may globally wrap all evaluation time condition operations through a custom function: """ @@ -447,7 +416,7 @@ def hk(f_op, a, b, l=l): You may compose such wrappers via repeated application of the `run_all_ops_thru` API function. - #### Condition Local Wrapping + ### Condition Local Wrapping This is done through the `ops_thru` parameter as shown: @@ -468,7 +437,7 @@ def myhk(f_op, a, b): > Using `ops_thru` is a good way to debug unexpected results, since you > can add breakpoints or loggers there. - ## Combining Operations + ### Combining Operations You can combine single conditions with @@ -482,21 +451,100 @@ def myhk(f_op, a, b): > Do not use spaces for the names of combining operators. The user may use them but they are replaced at before tokenizing time, like `and not` -> `and_not`. - ### Nesting + ## Details + + ### Debugging Lookups + + pycond provides a key getter which prints out every lookup. + """ + + def f3_2(): + f = pc.pycond('[[a eq b] or foo eq bar] or [baz eq bar]', lookup=pc.dbg_get) + assert f(state={'foo': 'bar'}) == True + + """ + ### Enabling/Disabling of Branches + + Insert booleans like shown: + """ + + def f3_21(): + f = pc.pycond(['foo', 'and', ['bar', 'eq', 1]]) + assert f(state={'foo': 1}) == False + f = pc.pycond(['foo', 'and', [True, 'or', ['bar', 'eq', 1]]]) + assert f(state={'foo': 1}) == True + + """ + ### Building Conditions From Text + + Condition functions are created internally from structured expressions - + but those are[hard to type]( # lazy-dynamic-context-assembly), + involving many apostropies. + + The text based condition syntax is intended for situations when end users + type them into text boxes directly. + + #### Grammar + + Combine atomic conditions with boolean operators and nesting brackets like: + + ``` + [< atom1 > < and | or | and not|... > ] < and|or... > [ [ < atom3 > .... + ``` + + #### Atomic Conditions + + ``` + [not] < lookup_key > [[rev] [not] < condition operator (co) > ] + ``` + - When just `lookup_key` is given, then `co` is set to the `truthy` function: + ```python + def truthy(key, val=None): + return operatur.truth(k) + ``` + + so such an expression is valid and True: + + """ + + def f4(): + pc.State.update({'foo': 1, 'bar': 'a', 'baz': []}) + assert pc.pycond('[ foo and bar and not baz]')() == True + + """ + - When `not lookup_key` is given, then `co` is set to the `falsy` + function: + + """ + + def f4_11(): + m = {'x': 'y', 'falsy_val': {}} + # normal way + assert pc.pycond(['foo', 'eq', None])(state=m) == True + # using "not" as prefix: + assert pc.pycond('not foo')(state=m) == True + assert pc.pycond(['not', 'foo'])(state=m) == True + assert pc.pycond('not falsy_val')(state=m) == True + assert pc.pycond('x and not foo')(state=m) == True + assert pc.pycond('y and not falsy_val')(state=m) == False + + """ + + #### Nesting Combined conditions may be arbitrarily nested using brackets "[" and "]". > Via the `brkts` config parameter you may change those to other separators at build time. - ## Tokenizing Details + ### Tokenizing Details > Brackets as strings in this flat list form, e.g. `['[', 'a', 'and' 'b', ']'...]` - ### Functioning + #### Functioning The tokenizers job is to take apart expression strings for the builder. - ### Separator `sep` + #### Separator `sep` Separates the different parts of an expression. Default is ' '. @@ -515,13 +563,15 @@ def f9_1(): def f10(): # equal: - assert pc.pycond('[[a eq 42] and b]')() == pc.pycond('[ [ a eq 42 ] and b ]')() + assert ( + pc.pycond('[[a eq 42] and b]')() == pc.pycond('[ [ a eq 42 ] and b ]')() + ) """ > The condition functions themselves do not evaluate equal - those > had been assembled two times. - ### Apostrophes + #### Apostrophes By putting strings into Apostrophes you can tell the tokenizer to not further inspect them, e.g. for the seperator: @@ -533,7 +583,7 @@ def f11(): """ - ### Escaping + #### Escaping Tell the tokenizer to not interpret the next character: @@ -547,7 +597,7 @@ def f12(): ### Building - ### Autoconv: Casting of values into python simple types + #### Autoconv: Casting of values into python simple types Expression string values are automatically cast into bools and numbers via the public `pycond.py_type` function. @@ -565,7 +615,7 @@ def f13(): """ - If you do not want to provide a custom lookup function (where you can do what you want) + If you do not want to provide a custom lookup function(where you can do what you want) but want to have looked up keys autoconverted then use: """ @@ -586,7 +636,7 @@ def f14(): pycond's `ctx_builder` allows to only calculate those keys at runtime, the user decided to base conditions upon: - At condition build time hand over a namespace for *all* functions which + At condition build time hand over a namespace for *all * functions which are available to build the ctx. `pycon` will return a context builder function for you, calling only those functions @@ -662,7 +712,7 @@ def clients(ctx): return 0 if sys.version_info[0] < 3: - # we don't think it is a good idea to make the getter API stateful: + # we don't think it is a good idea to make the getter API stateful ;-) p2m.convert_to_staticmethods(ApiCtxFuncs) f, nfos = pc.parse_cond(cond, ctx_provider=ApiCtxFuncs) @@ -684,19 +734,19 @@ def clients(ctx): cond, ApiCtxFuncs = f15_1() """ - # Lookup Providers + ## Lookup Providers ContextBuilders are interesting but we can do better. - We still calculated values for keys which might (dependent on the data) be not needed in dead ends of a lazily evaluated condition. + We still calculated values for keys which might(dependent on the data) be not needed in dead ends of a lazily evaluated condition. Lets avoid calculating these values, remembering the [custom lookup function](#custom-lookup-and-value-passing) feature. This is where lookup providers come in, providing namespaces for functions to be called conditionally. - Pycond [title: treats the condition keys as function names,fmatch:pycond.py,lmatch:def f_from_lookup_provider] within that namespace and calls them, when needed. + Pycond [title:treats the condition keys as function names, fmatch:pycond.py, lmatch:def f_from_lookup_provider] within that namespace and calls them, when needed. - ## Accepted Signatures + ### Accepted Signatures Lookup provider functions may have the following signatures: @@ -730,7 +780,7 @@ def f3(key, val, cfg, data, **kw): # applied al def f4(*a, **kw): """ - Full variant (always when varargs are involved) + Full variant(always when varargs are involved) """ return a[3]['d'], 'foo' @@ -748,7 +798,7 @@ def f4(*a, **kw): assert f(state={'a': 42, 'b': 43, 'c': 100, 'd': 'foo'}) == True """ - ## Parametrized Lookup Functions + ### Parametrized Lookup Functions Via the 'params' parameter you may supply keyword args to lookup functions: """ @@ -758,28 +808,32 @@ class F: def hello(k, v, cfg, data, count, **kw): return data['foo'] == count, 0 - m = pc.pycond([':hello'], lookup_provider=F, params={'hello': {'count': 2}})(state={'foo': 2}) + m = pc.pycond([':hello'], lookup_provider=F, params={'hello': {'count': 2}})( + state={'foo': 2} + ) assert m == True """ - ## Namespace + ### Namespace - - Lookup functions can be found in nested class hirarchies or dicts. Separator is colon (':') - - As shown above, if they are flat within a toplevel class or dict you should still prefix with ':', to get build time exception (MissingLookupFunction) when not present + - Lookup functions can be found in nested class hirarchies or dicts. Separator is colon(':') + - As shown above, if they are flat within a toplevel class or dict you should still prefix with ':', to get build time exception(MissingLookupFunction) when not present - You can switch that behaviour off per condition build as config arg, as shown below - - You can switch that behaviour off globally via `pc.prefixed_lookup_funcs = False` + - You can switch that behaviour off globally via `pc.prefixed_lookup_funcs=False` Warning: This is a breaking API change with pre-20200610 versions, where the prefix was not required to find functions in, back then, only flat namespaces. Use the global switch after import to get the old behaviour. - + """ def f15_15(): class F: - def a(data): return data['foo'] + def a(data): + return data['foo'] class inner: - def b(data): return data['bar'] + def b(data): + return data['bar'] m = {'c': {'d': {'func': lambda data: data['baz']}}} @@ -865,16 +919,16 @@ def f15_2(): Note: Currently you cannot override these defaults. Drop an issue if you need to. - Builtin state lookups: Not cached - - Custom `lookup` functions: Not cached (you can implement caching within those functions) + - Custom `lookup` functions: Not cached(you can implement caching within those functions) - Lookup provider return values: Cached, i.e. called only once, per data set - - Named condition sets (see below): Cached + - Named condition sets(see below): Cached ## Extensions - We deliver a few lookup function [title: extensions,fmatch:pycond.py,lmatch:class Extensions] + We deliver a few lookup function [title:extensions, fmatch:pycond.py, lmatch:class Extensions] - for time checks - - for os.environ checks (re-evaluated at runtime) + - for os.environ checks(re-evaluated at runtime) """ @@ -900,7 +954,7 @@ def f16_1(): """ - # Named Conditions: Qualification + ## Named Conditions: Qualification Instead of just delivering booleans, pycond can be used to determine a whole set of information about data declaratively, like so: @@ -992,7 +1046,9 @@ class F: def func(*a, **kw): return True, 0 - q = lambda d, **kw: pc.qualify(conds, lookup_provider=F, prefixed_lookup_funcs=False, **kw)(d) + q = lambda d, **kw: pc.qualify( + conds, lookup_provider=F, prefixed_lookup_funcs=False, **kw + )(d) m = q({'bar': 1}) assert m == {0: False, 1: True, 2: True, 3: True, 'n': True} @@ -1004,7 +1060,8 @@ def func(*a, **kw): 'conds': {0: False, 1: True, 2: True, 3: True, 'n': True}, } - def msg(): return {'bar': 1, 'pl': {'a': 1}} + def msg(): + return {'bar': 1, 'pl': {'a': 1}} # add_cached == True -> it's put into the cond results: m = q(msg(), into='conds', add_cached=True) @@ -1092,9 +1149,9 @@ def xx(k, v, cfg, data, **kw): """ """ - # Streaming Data + ## Streaming Data - Since version 20200601 and Python 3.x versions, pycond can deliver [ReactiveX](https://github.com/ReactiveX/RxPY) compliant stream operators. + Since version 20200601 and Python 3.x versions, pycond can deliver[ReactiveX](https://github.com/ReactiveX/RxPY) compliant stream operators. Lets first set up a test data stream, by defining a function `rx_setup` like so: @@ -1126,7 +1183,9 @@ def err(*a): # turns the ints into dicts: {'i': 1}, then {'i': 2} and so on: # (we start from 1, the first 0 we filter out) - stream = stream.pipe(rx.filter(lambda i: i > 0), rx.map(lambda i: {'i': i})) + stream = stream.pipe( + rx.filter(lambda i: i > 0), rx.map(lambda i: {'i': i}) + ) # defines the stream through the tested operators: test_pipe = test_pipe + (compl,) @@ -1162,7 +1221,7 @@ def rx_1(): """ -> test setup works. - ## Filtering + ### Filtering This is the most simple operation: A simple stream filter. @@ -1187,7 +1246,7 @@ def rx_filter(): assert r == odds """ - ## Streaming Classification + ### Streaming Classification Using named condition dicts we can classify data, i.e. tag it, in order to process subsequently: @@ -1224,7 +1283,7 @@ def run(offs=0): ### Selective Classification - We fall back to an alternative condition evaluation (which could be a function call) *only* when a previous condition evaluation returns something falsy - by providing a *root condition*. + We fall back to an alternative condition evaluation(which could be a function call) * only * when a previous condition evaluation returns something falsy - by providing a * root condition*. When it evaluated, possibly requiring evaluation of other conditions, we return: """ @@ -1248,7 +1307,7 @@ def rx_class_sel(): ] """ - #### Treating of Booleans (Conditions, Not Names) + ## Treating of Booleans (Conditions, Not Names) For the special case of booleans in a condition list we do not treat them as names. """ @@ -1271,7 +1330,7 @@ def test_bools_are_no_names(): Selective classification allows to call condition functions only when other criteria are met. That makes it possible to read e.g. from a database only when data is really required - and not always, "just in case". - pycond allows to define, that blocking operations should be run *async* within the stream, possibly giving up order. + pycond allows to define, that blocking operations should be run * async* within the stream, possibly giving up order. ### Asyncronous Filter @@ -1312,7 +1371,8 @@ def check(k, v, cfg, data, t0=[], **kw): """ def rx_async(): - def _thn(msg, data): return print('thread:', cur_thread().name, msg, data) + def _thn(msg, data): + return print('thread:', cur_thread().name, msg, data) # push_through just runs a stream of {'i': } through a given operator: Rx, rx, push_through = rx_setup() @@ -1334,7 +1394,7 @@ def _thn(msg, data): return print('thread:', cur_thread().name, msg, data) class F: """ Namespace for condition lookup functions. - You may also pass a dict (lookup_provider_dict) + You may also pass a dict(lookup_provider_dict) We provide the functions for 'odd' and 'blocking'. """