diff --git a/06_dataframe/2_filters.py b/06_dataframe/2_filters.py index 9a4fc4d..0c36798 100644 --- a/06_dataframe/2_filters.py +++ b/06_dataframe/2_filters.py @@ -1,5 +1,6 @@ from datetime import datetime +from chalk import online from chalk.features import DataFrame, features @@ -18,30 +19,53 @@ class User: id: int txns: DataFrame[Transaction] + # Computed Fields -# You can filter down the transactions by any of the -# properties on the transaction -credits = User.txns[Transaction.amount < 0] + num_valid_txns: int + num_rideshare_txns: int -# Or works much like `and`: -rideshare_income = User.txns[ - Transaction.amount < 0 - and (Transaction.merchant in ("uber", "lyft") or "uberpmts" == Transaction.memo) -] -# You can also check for set or list membership with `in`: -rideshare_txns = User.txns[Transaction.merchant in ("uber", "lyft")] +# You can filter down the transactions by any of the properties on the transaction -# Filters separated by commas function as `and` filters: -rideshare_credits = User.txns[ - Transaction.amount < 0, Transaction.merchant in ("uber", "lyft") -] -# Equivalently, you can use the keyword `and` instead of separating by commas -rideshare_credits = User.txns[ - Transaction.amount < 0 and Transaction.merchant in ("uber", "lyft") -] +@online(tags=["v1"]) +def get_count_rideshare_transactions_v1( + txns: User.txns[Transaction.merchant in ("uber", "lyft")] +) -> User.num_rideshare_txns: + # You can also check for set or list membership with `in`: + return len(txns) -# Filters can also check for None the same way you check for None in Python -valid_txns = User.txns[Transaction.canceled_at is not None] +@online(tags=["v2"]) +def get_count_rideshare_transactions_v2( + txns: User.txns[Transaction.amount < 0, Transaction.merchant in ("uber", "lyft")] +) -> User.num_rideshare_txns: + # Filters separated by commas function as `and` filters: + return len(txns) + + +@online(tags=["v3"]) +def get_count_rideshare_transactions_v3( + txns: User.txns[Transaction.amount < 0 and Transaction.merchant in ("uber", "lyft")] +) -> User.num_rideshare_txns: + # Equivalently, you can use the keyword `and` instead of separating by commas to apply multiple filters. + return len(txns) + + +@online(tags=["v4"]) +def get_count_rideshare_transactions_v4( + txns: User.txns[ + Transaction.amount < 0 + and (Transaction.merchant in ("uber", "lyft") or "uberpmts" == Transaction.memo) + ] +) -> User.num_rideshare_txns: + # or filters can also be applied through the "or" keyword + return len(txns) + + +@online +def get_count_valid_transactions( + valid_transactions: User.txns[Transaction.canceled_at is not None], +) -> User.num_valid_transactions: + # Filters can also check for None the same way you check for None in Python + return len(valid_transactions) diff --git a/06_dataframe/3_projections.py b/06_dataframe/3_projections.py index af1ef6a..8e5c414 100644 --- a/06_dataframe/3_projections.py +++ b/06_dataframe/3_projections.py @@ -14,8 +14,13 @@ class Transaction: class User: id: int txns: DataFrame[Transaction] + txn_total: int -# You can filter down the transactions by any of the -# properties on the transaction -credits = User.txns[Transaction.amount] +def get_transaction_total( + txns: User.txns[Transaction.amount] +) -> User.txn_total: + """we do not need the other fields in our transaction, so we can project, filtering out all columns except amount, + making the sum operation more efficient + """ + return txns.sum() diff --git a/06_dataframe/4_filters_and_projections.py b/06_dataframe/4_filters_and_projections.py index dee296f..c33a583 100644 --- a/06_dataframe/4_filters_and_projections.py +++ b/06_dataframe/4_filters_and_projections.py @@ -1,4 +1,5 @@ from chalk.features import DataFrame, _, features +from chalk import online @features @@ -14,11 +15,25 @@ class Transaction: class User: id: int txns: DataFrame[Transaction] + txn_total: int -# You can filter down the transactions by any of the -# properties on the transaction -credits = User.txns[Transaction.amount < 0] +# Filters and projections can be combined +@online(tags=['v1']) +def get_transaction_total_v1( + txns: User.txns[ + Transaction.amount < 0, # filter + Transaction.amount # projection + ] +) -> User.txn_total: + return txns.sum() -# You can also use the '_' as an alias for the current namespace -credits = User.txns[_.amount < 0] + +@online(tags=['v2']) +def get_transaction_total_v2( + txns: User.txns[ + _.amount < 0, # "_" is an alias for the current namespace + _.amount + ] +) -> User.txn_total: + return txns.sum() diff --git a/06_dataframe/5_aggregations.py b/06_dataframe/5_aggregations.py index a84c78e..61d4473 100644 --- a/06_dataframe/5_aggregations.py +++ b/06_dataframe/5_aggregations.py @@ -1,4 +1,5 @@ from chalk.features import DataFrame, features +from chalk import online @features @@ -20,5 +21,7 @@ class User: # You can filter down the transactions by any of the # properties on the transaction @online -def get_num_credits(credits: User.txns[Transaction.amount < 0]) -> User.num_credits: +def get_num_credits( + credits: User.txns[Transaction.amount < 0] +) -> User.num_credits: return len(credits)