diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 65735a11..9c226ace 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -563,7 +563,6 @@ ;Declare variables for internal use. (declare (assoc - max_influence_weight_entropy_to_keep .infinity cases (list) prev_prediction_stats_map (assoc) thresholds_enabled (or (size abs_threshold_map) (size delta_threshold_map) (size rel_threshold_map) ) @@ -576,21 +575,7 @@ weight_feature distribute_weight_feature )) ) - - (declare (assoc - hyperparam_map - (call !GetHyperparameters (assoc - context_features features - weight_feature distribute_weight_feature - )) - )) (declare (assoc - closest_k (get hyperparam_map "k") - p_parameter (get hyperparam_map "p") - dt_parameter (get hyperparam_map "dt") - feature_weights (get hyperparam_map "featureWeights") - feature_deviations (get hyperparam_map "featureDeviations") - query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") num_cases (call !GetNumTrainingCases) ;reduction will stop within batch_size of reduce_max_cases, so if the gap between @@ -599,6 +584,13 @@ approximate_num_cases_to_keep (max (- reduce_max_cases batch_size) !autoAblationMinNumCases) )) + ;nothing needed to reduce since the dataset is already small enough + (if (>= approximate_num_cases_to_keep num_cases) + (conclude + (call !Return (assoc payload output)) + ) + ) + (if thresholds_enabled (assign (assoc prev_prediction_stats_map @@ -613,75 +605,17 @@ )) ) - ;pair of cases and associated sorted popularities (total normalized influence of all neighbors that referenced it) - (declare (assoc - case_popularity_pair - (compute_on_contained_entities - (query_exists !internalLabelSession) - ||(query_entity_cumulative_nearest_entity_weights - closest_k - features - (null) ;all cases - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - distribute_weight_feature - (rand) - (null) ;radius - !numericalPrecision - .true - ) - ) - )) - - ;all the cases that were not returned in the pair above have 0 popularity (no other cases reference them) (declare (assoc - zero_popularity_neighbors + removable_cases (contained_entities (query_exists !internalLabelSession) - (query_not_in_entity_list (first case_popularity_pair)) + (query_select (- num_cases approximate_num_cases_to_keep) (null) (rand) ) ) )) - ;determine the cutoff value of the popularity at which all cases with a value less than that should be removed - ;e.g., if there needs to be a quarter of cases left, this would compute the 0.75 quantile of popularity values, - ;so that those bottom 75% are removed - (declare (assoc - reduction_popularity_cutoff - (quantile - (append - (last case_popularity_pair) - (range 0 1 (size zero_popularity_neighbors) 1) - ) - ;add one percent to account for enough cases selected to match the amount needed to be removed due to rounding - ;e.g., if the quantile value was 0.75 from the example above, this bumps it up to 0.76 - (+ - (/ (- num_cases approximate_num_cases_to_keep) num_cases) - 0.01 - ) - ) - )) - ;plan to only remove cases whose popularity is less than reduction_popularity_cutoff - ;i.e., only remove the non-popular cases that aren't referenced by others as much - (declare (assoc - num_removal_eligible_cases - (size (filter - (lambda (< (current_value) reduction_popularity_cutoff)) - (last case_popularity_pair) - )) - )) - (declare (assoc - ;case ids in order from highest to lowest popularity, lowest popularity at end of list - removable_cases - (append - ;only keep the necessary number of lowest popularity eligible cases as well as all zero popularity ones - (tail (first case_popularity_pair) num_removal_eligible_cases) - zero_popularity_neighbors - ) + ;randomize the order + (assign (assoc + removable_cases (rand removable_cases (size removable_cases) .true) )) (declare (assoc