Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 12 additions & 78 deletions howso/ablation.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,6 @@

;Declare variables for internal use.
(declare (assoc
max_influence_weight_entropy_to_keep .infinity
cases (list)
prev_prediction_stats_map (assoc)
thresholds_enabled (or (size abs_threshold_map) (size delta_threshold_map) (size rel_threshold_map) )
Expand All @@ -576,21 +575,7 @@
weight_feature distribute_weight_feature
))
)

(declare (assoc
hyperparam_map
(call !GetHyperparameters (assoc
context_features features
weight_feature distribute_weight_feature
))
))
(declare (assoc
closest_k (get hyperparam_map "k")
p_parameter (get hyperparam_map "p")
dt_parameter (get hyperparam_map "dt")
feature_weights (get hyperparam_map "featureWeights")
feature_deviations (get hyperparam_map "featureDeviations")
query_feature_attributes_map (get hyperparam_map "featureDomainAttributes")
num_cases (call !GetNumTrainingCases)

;reduction will stop within batch_size of reduce_max_cases, so if the gap between
Expand All @@ -599,6 +584,13 @@
approximate_num_cases_to_keep (max (- reduce_max_cases batch_size) !autoAblationMinNumCases)
))

;nothing needed to reduce since the dataset is already small enough
(if (>= approximate_num_cases_to_keep num_cases)
(conclude
(call !Return (assoc payload output))
)
)

(if thresholds_enabled
(assign (assoc
prev_prediction_stats_map
Expand All @@ -613,75 +605,17 @@
))
)

;pair of cases and associated sorted popularities (total normalized influence of all neighbors that referenced it)
(declare (assoc
case_popularity_pair
(compute_on_contained_entities
(query_exists !internalLabelSession)
||(query_entity_cumulative_nearest_entity_weights
closest_k
features
(null) ;all cases
p_parameter
feature_weights
!queryDistanceTypeMap
query_feature_attributes_map
feature_deviations
(null)
dt_parameter
distribute_weight_feature
(rand)
(null) ;radius
!numericalPrecision
.true
)
)
))

;all the cases that were not returned in the pair above have 0 popularity (no other cases reference them)
(declare (assoc
zero_popularity_neighbors
removable_cases
(contained_entities
(query_exists !internalLabelSession)
(query_not_in_entity_list (first case_popularity_pair))
(query_select (- num_cases approximate_num_cases_to_keep) (null) (rand) )
)
))

;determine the cutoff value of the popularity at which all cases with a value less than that should be removed
;e.g., if there needs to be a quarter of cases left, this would compute the 0.75 quantile of popularity values,
;so that those bottom 75% are removed
(declare (assoc
reduction_popularity_cutoff
(quantile
(append
(last case_popularity_pair)
(range 0 1 (size zero_popularity_neighbors) 1)
)
;add one percent to account for enough cases selected to match the amount needed to be removed due to rounding
;e.g., if the quantile value was 0.75 from the example above, this bumps it up to 0.76
(+
(/ (- num_cases approximate_num_cases_to_keep) num_cases)
0.01
)
)
))
;plan to only remove cases whose popularity is less than reduction_popularity_cutoff
;i.e., only remove the non-popular cases that aren't referenced by others as much
(declare (assoc
num_removal_eligible_cases
(size (filter
(lambda (< (current_value) reduction_popularity_cutoff))
(last case_popularity_pair)
))
))
(declare (assoc
;case ids in order from highest to lowest popularity, lowest popularity at end of list
removable_cases
(append
;only keep the necessary number of lowest popularity eligible cases as well as all zero popularity ones
(tail (first case_popularity_pair) num_removal_eligible_cases)
zero_popularity_neighbors
)
;randomize the order
(assign (assoc
removable_cases (rand removable_cases (size removable_cases) .true)
))

(declare (assoc
Expand Down
Loading