From 51049cc708678024cb690617b5bd20ad4c650c3a Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Tue, 13 Jan 2026 16:12:31 -0500 Subject: [PATCH 01/39] grab random cases to remove --- howso/ablation.amlg | 67 ++------------------------------------------- 1 file changed, 2 insertions(+), 65 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index e8bdf90e..8c0ab2fe 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -613,74 +613,11 @@ )) ) - ;pair of cases and associated sorted popularities (total normalized influence of all neighbors that referenced it) (declare (assoc - case_popularity_pair - (compute_on_contained_entities - (query_exists !internalLabelSession) - ||(query_entity_cumulative_nearest_entity_weights - closest_k - features - (null) ;all cases - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - distribute_weight_feature - (rand) - (null) ;radius - !numericalPrecision - .true - ) - ) - )) - - ;all the cases that were not returned in the pair above have 0 popularity (no other cases reference them) - (declare (assoc - zero_popularity_neighbors + removable_cases (contained_entities (query_exists !internalLabelSession) - (query_not_in_entity_list (first case_popularity_pair)) - ) - )) - - ;determine the cutoff value of the popularity at which all cases with a value less than that should be removed - ;e.g., if there needs to be a quarter of cases left, this would compute the 0.75 quantile of popularity values, - ;so that those bottom 75% are removed - (declare (assoc - reduction_popularity_cutoff - (quantile - (append - (last case_popularity_pair) - (range 0 1 (size zero_popularity_neighbors) 1) - ) - ;add one percent to account for enough cases selected to match the amount needed to be removed due to rounding - ;e.g., if the quantile value was 0.75 from the example above, this bumps it up to 0.76 - (+ - (/ (- num_cases approximate_num_cases_to_keep) num_cases) - 0.01 - ) - ) - )) - ;plan to only remove cases whose popularity is less than reduction_popularity_cutoff - ;i.e., only remove the non-popular cases that aren't referenced by others as much - (declare (assoc - num_removal_eligible_cases - (size (filter - (lambda (< (current_value) reduction_popularity_cutoff)) - (last case_popularity_pair) - )) - )) - (declare (assoc - ;case ids in order from highest to lowest popularity, lowest popularity at end of list - removable_cases - (append - ;only keep the necessary number of lowest popularity eligible cases as well as all zero popularity ones - (tail (first case_popularity_pair) num_removal_eligible_cases) - zero_popularity_neighbors + (query_select (- num_cases approximate_num_cases_to_keep) (null) (rand) ) ) )) From 495f44a8602a7f964fa2f37eac60c5a69cb5549e Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:24:14 -0500 Subject: [PATCH 02/39] remove some unused stuff --- howso/ablation.amlg | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 8c0ab2fe..19a2bb30 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -576,21 +576,7 @@ weight_feature distribute_weight_feature )) ) - - (declare (assoc - hyperparam_map - (call !GetHyperparameters (assoc - context_features features - weight_feature distribute_weight_feature - )) - )) (declare (assoc - closest_k (get hyperparam_map "k") - p_parameter (get hyperparam_map "p") - dt_parameter (get hyperparam_map "dt") - feature_weights (get hyperparam_map "featureWeights") - feature_deviations (get hyperparam_map "featureDeviations") - query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") num_cases (call !GetNumTrainingCases) ;reduction will stop within batch_size of reduce_max_cases, so if the gap between From f815bf13a5229cfcb96b2cbdc293a5e564a37ba8 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:47:09 -0500 Subject: [PATCH 03/39] one last unused var --- howso/ablation.amlg | 1 - 1 file changed, 1 deletion(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 19a2bb30..5d98a1b2 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -563,7 +563,6 @@ ;Declare variables for internal use. (declare (assoc - max_influence_weight_entropy_to_keep .infinity cases (list) prev_prediction_stats_map (assoc) thresholds_enabled (or (size abs_threshold_map) (size delta_threshold_map) (size rel_threshold_map) ) From 5fbefc97738ceca1bd723adb3f1f42bcf979dd29 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Wed, 14 Jan 2026 13:08:44 -0500 Subject: [PATCH 04/39] randomize test --- howso/ablation.amlg | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 5d98a1b2..6d074d85 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -584,6 +584,13 @@ approximate_num_cases_to_keep (max (- reduce_max_cases batch_size) !autoAblationMinNumCases) )) + ;nothing needed to reduce since the dataset is already small enough + (if (>= approximate_num_cases_to_keep num_cases) + (conclude + (call !Return (assoc payload output)) + ) + ) + (if thresholds_enabled (assign (assoc prev_prediction_stats_map @@ -606,6 +613,11 @@ ) )) + ;randomize the order + (assign (assoc + removable_cases (rand removable_cases (size removable_cases) .true) + )) + (declare (assoc ;list will be sorted from highest to lowest, thus cases removed from the end of the list end_index (- (size removable_cases) 1) From 457700dfc5f81c96dfd04f99331eb9df9dd603f2 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Wed, 14 Jan 2026 14:22:21 -0500 Subject: [PATCH 05/39] make it one pass --- howso/ablation.amlg | 133 ++++++++++++++------------------------------ 1 file changed, 42 insertions(+), 91 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 6d074d85..ad8f0a38 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -615,116 +615,67 @@ ;randomize the order (assign (assoc - removable_cases (rand removable_cases (size removable_cases) .true) + cases (rand removable_cases (size removable_cases) .true) )) - (declare (assoc - ;list will be sorted from highest to lowest, thus cases removed from the end of the list - end_index (- (size removable_cases) 1) - random_cases .false - num_removed_this_batch 0 - )) - - ;Begin looping on data removal. The ultimate end condition is if the dataset gets too small to continue removing cases. - (while (< !autoAblationMinNumCases (call !GetNumTrainingCases)) - (assign (assoc - num_removed_this_batch (min batch_size (- (call !GetNumTrainingCases) !autoAblationMinNumCases)) - )) + (if !tsTimeFeature + ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series (assign (assoc cases - (if (>= end_index 0) - ;grab the cases from the end, with the smallest values - (unzip - removable_cases - (range - (max 0 (- end_index num_removed_this_batch -1)) - end_index - ) - ) - - ;else select random cases - (contained_entities - (query_exists distribute_weight_feature) - (query_select num_removed_this_batch (null) (rand) ) - ) + (contained_entities + (query_in_entity_list cases) + (query_not_equals ".reverse_series_index" 0) + (query_not_equals ".series_index" 0) ) )) + ) - (if (>= end_index 0) - ;update end index to account for the cases about to be removed - (assign (assoc end_index (- end_index (size cases)) )) - - ;else no more removable cases left, remove random cases - (assign (assoc random_cases .true)) - ) - - (if !tsTimeFeature - ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series - (assign (assoc - cases - (contained_entities - (query_in_entity_list cases) - (query_not_equals ".reverse_series_index" 0) - (query_not_equals ".series_index" 0) - ) + (if (size cases) + (seq + (call !RemoveCases (assoc + cases cases + distribute_weight_feature distribute_weight_feature )) - ) - (if (size cases) - (seq - (call !RemoveCases (assoc - cases cases - distribute_weight_feature distribute_weight_feature - )) - - (if thresholds_enabled - (let - (assoc - batch_threshold_info (null) - new_prediction_stats_map - (get - (call !CalculateFeatureResiduals (assoc - weight_feature distribute_weight_feature - use_case_weights .true - compute_all_statistics .true - )) - "prediction_stats" - ) - ) - (assign (assoc - batch_threshold_info - (call !CheckThresholds (assoc - abs_threshold_map abs_threshold_map - delta_threshold_map delta_threshold_map - rel_threshold_map rel_threshold_map - prev_prediction_stats_map prev_prediction_stats_map - new_prediction_stats_map new_prediction_stats_map + (if thresholds_enabled + (let + (assoc + batch_threshold_info (null) + new_prediction_stats_map + (get + (call !CalculateFeatureResiduals (assoc + weight_feature distribute_weight_feature + use_case_weights .true + compute_all_statistics .true )) - )) - (if (apply "or" (values batch_threshold_info)) - (seq - (accum "output" ["threshold_info"] batch_threshold_info) - (conclude) + "prediction_stats" ) - (assign (assoc - prev_prediction_stats_map new_prediction_stats_map + ) + (assign (assoc + batch_threshold_info + (call !CheckThresholds (assoc + abs_threshold_map abs_threshold_map + delta_threshold_map delta_threshold_map + rel_threshold_map rel_threshold_map + prev_prediction_stats_map prev_prediction_stats_map + new_prediction_stats_map new_prediction_stats_map )) + )) + (if (apply "or" (values batch_threshold_info)) + (seq + (accum "output" ["threshold_info"] batch_threshold_info) + (conclude) ) + (assign (assoc + prev_prediction_stats_map new_prediction_stats_map + )) ) ) ) - - ;else couldn't select any from random cases, stop - (and random_cases (< end_index 0)) - (conclude) - ) - - ;enough cases have been removed, can stop removing - (if (<= (call !GetNumTrainingCases) reduce_max_cases) - (conclude) ) ) + ;if the number of cases has been reduced by 'e' or more, auto analyze if needed (if (< (call !GetNumTrainingCases) (/ num_cases 2.718281828459)) (call !AutoAnalyzeIfNeeded (assoc From f8877701979169b359794e9b61324d3c12e7831c Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Wed, 14 Jan 2026 17:19:48 -0500 Subject: [PATCH 06/39] crappy python translation --- howso/ablation.amlg | 168 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 6 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index ad8f0a38..5b7a69d9 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -605,17 +605,173 @@ )) ) + ;START facility something algo + ;approximate_num_cases_to_keep is the amount of cases to select + + (declare (assoc + hyperparam_map (call !GetHyperparameters (assoc weight_feature distribute_weight_feature)) + )) + + (declare (assoc + k_parameter (get hyperparam_map "k") + p_parameter (get hyperparam_map "p") + feature_weights (get hyperparam_map "featureWeights") + dt_parameter (get hyperparam_map "dt") + feature_deviations (get hyperparam_map "featureDeviations") + query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") + )) + + (declare (assoc + all_case_ids (call !AllCases) + ) + + (declare (assoc + best_sim + ;best similarity of all cases to the chosen set of cases to keep + (map 0 all_case_ids) + )) + + (declare (assoc + similarity_matrix + ||(map + (lambda + (unzip + (compute_on_contained_entities + (query_in_entity_list all_case_ids) + (query_within_generalized_distance + .infinity ;distance + features + (retrieve_from_entity (current_value) features) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + dt_parameter + (if valid_weight_feature weight_feature (null)) + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + ) + all_case_ids + ) + ) + all_case_ids + ) + )) + + (declare (assoc + initial_gain + (map + (lambda (apply "+" (current_value)) ) + similarity_matrix + ) + done .false + )) + + (declare (assoc + heap (zip all_case_ids initial_gain) + )) + (declare (assoc - removable_cases - (contained_entities - (query_exists !internalLabelSession) - (query_select (- num_cases approximate_num_cases_to_keep) (null) (rand) ) + cases_to_keep + (while (not done) + (let + (assoc + current_cases_to_keep + (if (= (current_index 1) 0) + (list) + + (previous_result 1) + ) + ) + + ;neg_gain, idx, stale = heapq.heappop(heap) + (declare (assoc + most_similar_case (first (index_max heap)) + )) + (declare (assoc + most_similar_case_index + (first (filter + (lambda (= (get all_case_ids (current_value)) most_similar_case ) + (indices all_case_ids) + )) + )) + (declare (assoc + gain (get heap most_similar_case) + )) + (assign (assoc + heap (remove heap [most_similar_case]) + )) + + (declare (assoc + update_best_sim + (map + (lambda (apply "max" (current_value))) + best_sim + ;similarity[:, idx] + (map + (lambda (get (current_value) most_similar_case_index)) + similarity_matrix + ) + ) + )) + + ;true_gain = (np.maximum(best_sim, similarity[:, idx])).sum() - best_sim.sum() + (declare (assoc + true_gain + (- + (apply "+" updated_best_sim) + (apply "+" best_sim) + ) + )) + + (if (!= gain true_gain) + (seq + ;update heap with true gain + ;"heapq.heappush(heap, (-true_gain, idx, best_sim[idx]))" + (assign (assoc + heap + (append + heap + (associate most_similar_case true_gain) + ) + )) + + ;keep current set + current_cases_to_keep + ) + + (seq + ; # update best_sim for all points + ; best_sim = np.maximum(best_sim, similarity[:, idx]) + (assign (assoc + best_sim updated_best_sim + )) + + (if (> (size current_cases_to_keep_set) !autoAblationMinNumCases) + (assign (assoc done .true )) + ) + + ; # otherwise the gain is current → accept this point + ; selected.append(idx) + (append current_cases_to_keep most_similar_case) + ) + ) + ) ) )) - ;randomize the order + + + + ;END facility something algo + (assign (assoc - cases (rand removable_cases (size removable_cases) .true) + ;the list of case ids to be removed + cases (null) )) (if !tsTimeFeature From 67ee26d93c4c893217136d957a9238b9af99e55e Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Thu, 15 Jan 2026 11:58:12 -0500 Subject: [PATCH 07/39] fixed --- howso/ablation.amlg | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 5b7a69d9..636ff9f3 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -695,7 +695,7 @@ (declare (assoc most_similar_case_index (first (filter - (lambda (= (get all_case_ids (current_value)) most_similar_case ) + (lambda (= (get all_case_ids (current_value)) most_similar_case ) ) (indices all_case_ids) )) )) @@ -707,7 +707,7 @@ )) (declare (assoc - update_best_sim + updated_best_sim (map (lambda (apply "max" (current_value))) best_sim @@ -751,7 +751,7 @@ best_sim updated_best_sim )) - (if (> (size current_cases_to_keep_set) !autoAblationMinNumCases) + (if (> (size current_cases_to_keep) !autoAblationMinNumCases) (assign (assoc done .true )) ) @@ -771,7 +771,11 @@ (assign (assoc ;the list of case ids to be removed - cases (null) + cases + (contained_entities + (query_exists !internalLabelSession) + (query_not_in_entity_list cases_to_keep) + ) )) (if !tsTimeFeature From 7448590cfe04e237e66c4bebce58bdf679a1c9cc Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Thu, 15 Jan 2026 12:05:47 -0500 Subject: [PATCH 08/39] msimatch paren --- howso/ablation.amlg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 636ff9f3..4970cc1a 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -623,7 +623,7 @@ (declare (assoc all_case_ids (call !AllCases) - ) + )) (declare (assoc best_sim From e3b1b93945b5c3e91fb535d3d5b596cc5311a8c5 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Thu, 15 Jan 2026 16:54:59 -0500 Subject: [PATCH 09/39] impl --- howso/ablation.amlg | 227 ++++++++++++++++++++------------------------ 1 file changed, 104 insertions(+), 123 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 4970cc1a..cfcbb72b 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -623,148 +623,129 @@ (declare (assoc all_case_ids (call !AllCases) - )) - - (declare (assoc - best_sim - ;best similarity of all cases to the chosen set of cases to keep - (map 0 all_case_ids) - )) - - (declare (assoc - similarity_matrix - ||(map - (lambda - (unzip - (compute_on_contained_entities - (query_in_entity_list all_case_ids) - (query_within_generalized_distance - .infinity ;distance - features - (retrieve_from_entity (current_value) features) - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - (if valid_weight_feature weight_feature (null)) - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) - ) - all_case_ids - ) - ) - all_case_ids - ) - )) - - (declare (assoc - initial_gain - (map - (lambda (apply "+" (current_value)) ) - similarity_matrix - ) done .false )) - (declare (assoc - heap (zip all_case_ids initial_gain) - )) + (map + (lambda + (accum_entity_roots (current_value) (zip_labels + ["keeping"] [.false] + )) + ) + all_case_ids + ) - (declare (assoc - cases_to_keep - (while (not done) - (let - (assoc - current_cases_to_keep - (if (= (current_index 1) 0) - (list) - (previous_result 1) - ) + #!ReduceComputeDCs + (let + (assoc + case_to_dc_map + (compute_on_contained_entities + (query_equals "keeping" .false) + ||(query_entity_distance_contributions + k_parameter + features + (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false)) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) + distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision ) + ) + ) - ;neg_gain, idx, stale = heapq.heappop(heap) - (declare (assoc - most_similar_case (first (index_max heap)) - )) - (declare (assoc - most_similar_case_index - (first (filter - (lambda (= (get all_case_ids (current_value)) most_similar_case ) ) - (indices all_case_ids) - )) - )) - (declare (assoc - gain (get heap most_similar_case) - )) - (assign (assoc - heap (remove heap [most_similar_case]) - )) + (call !StoreCaseValues (assoc + case_values_map case_to_dc_map + label_name ".reduce_dc_val" + )) + ) - (declare (assoc - updated_best_sim - (map - (lambda (apply "max" (current_value))) - best_sim - ;similarity[:, idx] - (map - (lambda (get (current_value) most_similar_case_index)) - similarity_matrix - ) - ) - )) + (while (not done) + (let + (assoc + case_to_add + (if (= (current_index 1) 0) + ;on first iteration, just take lowest DC case + (first (contained_entities + (query_exists !internalLabelSession) + (query_equals "keeping" .false) + (query_min ".reduce_dc_val" 1 .true) + )) - ;true_gain = (np.maximum(best_sim, similarity[:, idx])).sum() - best_sim.sum() - (declare (assoc - true_gain - (- - (apply "+" updated_best_sim) - (apply "+" best_sim) + ;otherwise need case with low DC that is far from its most similar case in current_cases_to_keep + (let + (assoc + lowest_dc_cases + (contained_entities + (query_exists !internalLabelSession) + (query_equals "keeping" .false) + (query_min ".reduce_dc_val" 5 .true) + ) ) - )) - (if (!= gain true_gain) - (seq - ;update heap with true gain - ;"heapq.heappush(heap, (-true_gain, idx, best_sim[idx]))" - (assign (assoc - heap - (append - heap - (associate most_similar_case true_gain) + ;for each low-DC case, get its distance to its closest case that we ARE KEEPING + (declare (assoc + lowest_dc_closest_distance_to_selected_map + (map + (lambda + (first (compute_on_contained_entities + (query_equals "keeping" .true) + (query_distance_contributions + 1 + features + [(retrieve_from_entity (current_index 1) features)] + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) + distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + )) + ) + (zip lowest_dc_cases) ) )) - ;keep current set - current_cases_to_keep - ) - (seq - ; # update best_sim for all points - ; best_sim = np.maximum(best_sim, similarity[:, idx]) - (assign (assoc - best_sim updated_best_sim - )) - - (if (> (size current_cases_to_keep) !autoAblationMinNumCases) - (assign (assoc done .true )) - ) - - ; # otherwise the gain is current → accept this point - ; selected.append(idx) - (append current_cases_to_keep most_similar_case) + (first (index_max lowest_dc_closest_distance_to_selected_map)) ) ) - ) ) - )) + (assign_to_entities case_to_add (assoc + keeping .true + )) + + (if (>= + (size (contained_entities + (query_exists !internalLabelSession) + (query_equals "keeping" .true) + )) + !autoAblationMinNumCases + ) + (assign (assoc done .true)) + (if (= (mod (current_index) 50) 0) + (call !ReduceComputeDCs) + ) + ) + ) + ) ;END facility something algo @@ -774,7 +755,7 @@ cases (contained_entities (query_exists !internalLabelSession) - (query_not_in_entity_list cases_to_keep) + (query_equals "keeping" .false) ) )) From ca752b8cab878506754be9372f105880cd1b9565 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:02:13 -0500 Subject: [PATCH 10/39] udpaters --- howso/ablation.amlg | 107 +++++++++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 32 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index cfcbb72b..68a13680 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -626,6 +626,7 @@ done .false )) + ;mark each case as not being kept at first (map (lambda (accum_entity_roots (current_value) (zip_labels @@ -641,7 +642,7 @@ (assoc case_to_dc_map (compute_on_contained_entities - (query_equals "keeping" .false) + (query_equals "keeping" .false) ;maybe not appropriate? TODO ||(query_entity_distance_contributions k_parameter features @@ -668,68 +669,110 @@ )) ) + ;experimental params + (declare (assoc + ;the amount of low DC cases to consider for keeping + lowest_dc_trunc_n 15 + + ;number of cases to select for keeping per iteration + cases_to_keep_per_iter 5 + + ;how many iterations between each DC recomputation + dc_recompute_cycles 10 + )) + (while (not done) (let (assoc - case_to_add + cases_to_add (if (= (current_index 1) 0) ;on first iteration, just take lowest DC case - (first (contained_entities + (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false) (query_min ".reduce_dc_val" 1 .true) - )) + ) - ;otherwise need case with low DC that is far from its most similar case in current_cases_to_keep + ;otherwise need cases with low DC that is far from its most similar case in current_cases_to_keep (let (assoc lowest_dc_cases (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false) - (query_min ".reduce_dc_val" 5 .true) + (query_min ".reduce_dc_val" lowest_dc_trunc_n .true) ) ) ;for each low-DC case, get its distance to its closest case that we ARE KEEPING (declare (assoc lowest_dc_closest_distance_to_selected_map + (compute_on_contained_entities + (query_equals "keeping" .true) + ||(query_entity_distance_contributions + 1 ;k + features + lowest_dc_cases + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) + distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + ) + )) + + (declare (assoc + low_dc_case_scores (map (lambda - (first (compute_on_contained_entities - (query_equals "keeping" .true) - (query_distance_contributions - 1 - features - [(retrieve_from_entity (current_index 1) features)] - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) - distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) - )) + ;divide distance to closest selected case by distance contribution + ;more distance to closest selected case = good + ;lower distance contribution = good + (/ + (current_value) + (retrieve_from_entity (current_index) ".reduce_dc_val") + ) + ) + lowest_dc_closest_distance_to_selected_map + ) + )) + + (declare (assoc + ;sorting low dc cases by *decreasing* "score" + sorted_lowest_dc_cases + (sort + (lambda + (- + (get low_dc_case_scores (current_value 1)) + (get low_dc_case_scores (current_value)) + ) ) - (zip lowest_dc_cases) + lowest_dc_cases ) )) - (first (index_max lowest_dc_closest_distance_to_selected_map)) + (trunc sorted_lowest_dc_cases cases_to_keep_per_iter) ) ) ) - (assign_to_entities case_to_add (assoc - keeping .true - )) + (map + (lambda + (assign_to_entities (current_value) (assoc + keeping .true + )) + ) + cases_to_add + ) (if (>= (size (contained_entities @@ -740,7 +783,7 @@ ) (assign (assoc done .true)) - (if (= (mod (current_index) 50) 0) + (if (and (current_index) (= (mod (current_index) dc_recompute_cycles) 0) ) (call !ReduceComputeDCs) ) ) From 3a539a0f65835e83409cacdf97611945f55d0757 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:46:29 -0500 Subject: [PATCH 11/39] now testing this version --- howso/ablation.amlg | 116 +++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 68a13680..a7ca465e 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -637,14 +637,13 @@ ) - #!ReduceComputeDCs + #!ReduceComputeNeighborSurprisals (let (assoc - case_to_dc_map + case_neighbor_surprisal_map (compute_on_contained_entities - (query_equals "keeping" .false) ;maybe not appropriate? TODO ||(query_entity_distance_contributions - k_parameter + 1 features (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false)) p_parameter @@ -664,21 +663,19 @@ ) (call !StoreCaseValues (assoc - case_values_map case_to_dc_map - label_name ".reduce_dc_val" + case_values_map case_neighbor_surprisal_map + label_name ".neighbor_surprisal" )) ) ;experimental params (declare (assoc - ;the amount of low DC cases to consider for keeping - lowest_dc_trunc_n 15 + ;the amount of lowest smallest-surprisal cases to consider for keeping + ;CAN BE NULLED + lowest_ns_cases_trunc_n (null) ;number of cases to select for keeping per iteration - cases_to_keep_per_iter 5 - - ;how many iterations between each DC recomputation - dc_recompute_cycles 10 + cases_to_keep_per_iter 1 )) (while (not done) @@ -690,77 +687,88 @@ (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false) - (query_min ".reduce_dc_val" 1 .true) + (query_min ".neighbor_surprisal" 1 .true) ) - ;otherwise need cases with low DC that is far from its most similar case in current_cases_to_keep + ;otherwise need cases with low neighbor surprisal (ns) that is far from its most similar case in current_cases_to_keep (let (assoc - lowest_dc_cases + lowest_ns_cases (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false) - (query_min ".reduce_dc_val" lowest_dc_trunc_n .true) + (if lowest_ns_cases_trunc_n + (query_min ".neighbor_surprisal" lowest_ns_cases_trunc_n .true) + ) ) ) - ;for each low-DC case, get its distance to its closest case that we ARE KEEPING + ;for each low-ns case, get its core-set surprisal (css) (declare (assoc - lowest_dc_closest_distance_to_selected_map - (compute_on_contained_entities - (query_equals "keeping" .true) - ||(query_entity_distance_contributions - 1 ;k - features - lowest_dc_cases - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) - distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision + lowest_ns_css_map + (map + (lambda + ;find the max distance between each low-ns-case and any of the selected cases + (apply "max" (values + (compute_on_contained_entities + (query_equals "keeping" .true) + (query_within_generalized_distance + .infinity ;distance + features + (retrieve_from_entity (current_index) features) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) + distribute_weight_feature + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + ) + )) ) + (zip lowest_ns_cases) ) )) (declare (assoc - low_dc_case_scores + low_ns_case_scores (map (lambda - ;divide distance to closest selected case by distance contribution - ;more distance to closest selected case = good - ;lower distance contribution = good + ;divide neighbor surprisal by coreset surprisal + + ;I think the LLM desc was wrong, so I flipped it to be coreset surprisal / neighbor surprisal (/ (current_value) - (retrieve_from_entity (current_index) ".reduce_dc_val") + (retrieve_from_entity (current_index) ".neighbor_surprisal") ) ) - lowest_dc_closest_distance_to_selected_map + lowest_ns_css_map ) )) - (declare (assoc - ;sorting low dc cases by *decreasing* "score" - sorted_lowest_dc_cases + + ;sorting low dc cases by *decreasing* "score" and return the right amount + (trunc + (if (= 1 cases_to_keep_per_iter) + (index_max low_ns_case_scores) + (sort (lambda (- - (get low_dc_case_scores (current_value 1)) - (get low_dc_case_scores (current_value)) + (get low_ns_case_scores (current_value 1)) + (get low_ns_case_scores (current_value)) ) ) - lowest_dc_cases + lowest_ns_cases ) - )) - - - (trunc sorted_lowest_dc_cases cases_to_keep_per_iter) + ) + cases_to_keep_per_iter + ) ) ) ) @@ -783,9 +791,7 @@ ) (assign (assoc done .true)) - (if (and (current_index) (= (mod (current_index) dc_recompute_cycles) 0) ) - (call !ReduceComputeDCs) - ) + ; (call !ReduceComputeSmallestSurprisals) ) ) ) From 022331a246a06c43a227700b2866696fe7f39349 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:49:38 -0500 Subject: [PATCH 12/39] nitpicking --- howso/ablation.amlg | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index a7ca465e..bcad50d4 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -645,7 +645,7 @@ ||(query_entity_distance_contributions 1 features - (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false)) + all_case_ids p_parameter feature_weights !queryDistanceTypeMap @@ -670,7 +670,7 @@ ;experimental params (declare (assoc - ;the amount of lowest smallest-surprisal cases to consider for keeping + ;the amount of lowest neighbor-surprisal cases to consider for keeping ;CAN BE NULLED lowest_ns_cases_trunc_n (null) @@ -773,6 +773,7 @@ ) ) + ;mark new cases to keep (map (lambda (assign_to_entities (current_value) (assoc From f967c97eb59c2fcf919a01e07a01a201a8af607f Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 15:01:50 -0500 Subject: [PATCH 13/39] optimization --- howso/ablation.amlg | 84 +++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index bcad50d4..8e807b02 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -668,6 +668,11 @@ )) ) + (declare (assoc + ;map of case to its core-set surprisal (the max surprisal to any case in the coreset for all cases) + case_to_css_map (map (lambda (+ 0)) (zip all_case_ids)) + )) + ;experimental params (declare (assoc ;the amount of lowest neighbor-surprisal cases to consider for keeping @@ -703,38 +708,6 @@ ) ) - ;for each low-ns case, get its core-set surprisal (css) - (declare (assoc - lowest_ns_css_map - (map - (lambda - ;find the max distance between each low-ns-case and any of the selected cases - (apply "max" (values - (compute_on_contained_entities - (query_equals "keeping" .true) - (query_within_generalized_distance - .infinity ;distance - features - (retrieve_from_entity (current_index) features) - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) - distribute_weight_feature - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) - ) - )) - ) - (zip lowest_ns_cases) - ) - )) - (declare (assoc low_ns_case_scores (map @@ -743,11 +716,11 @@ ;I think the LLM desc was wrong, so I flipped it to be coreset surprisal / neighbor surprisal (/ - (current_value) + (get case_to_css_map (current_index)) (retrieve_from_entity (current_index) ".neighbor_surprisal") ) ) - lowest_ns_css_map + (zip lowest_ns_cases) ) )) @@ -783,6 +756,49 @@ cases_to_add ) + (declare (assoc + new_case_css_map + (map + (lambda + ;get their max surprisal to any of the cases_to_add + (apply "max" (values + (compute_on_contained_entities + (query_in_entity_list cases_to_add) + (query_within_generalized_distance + .infinity ;distance + features + (retrieve_from_entity (current_index) features) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) + distribute_weight_feature + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + ) + )) + ) + ;all non-coreset cases + (zip (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false))) + ) + )) + + (assign (assoc + case_to_css_map + ;take max of new max css and old max css + (map + (lambda + (max (current_value) (get case_to_css_map (current_index))) + ) + new_case_css_map + ) + )) + (if (>= (size (contained_entities (query_exists !internalLabelSession) From e5769dd3139d33a69fb4125536b21a2b1e3294c8 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Fri, 16 Jan 2026 15:46:46 -0500 Subject: [PATCH 14/39] blur weights, 2 passes --- howso/ablation.amlg | 41 ++++++++++++++++++ howso/update_cases.amlg | 96 +++++++++++++++++++++++++++-------------- 2 files changed, 105 insertions(+), 32 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index bcad50d4..6c969791 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -626,6 +626,47 @@ done .false )) + (declare (assoc + neighbors_map + ||(map + (lambda + (compute_on_contained_entities + (query_not_in_entity_list [(current_index 1)]) + (query_nearest_generalized_distance + k_parameter + features + (current_index) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + dt_parameter + distribute_weight_feature + (rand) + (null) ;radius + !numericalPrecision + ) + ) + ) + (zip all_case_ids) + ) + )) + + ;blur weights among neighbors + (call !DistributeCaseInfluenceWeights (assoc + case_ids all_case_ids + redistribute_weights_map neighbors_map + has_rebalance_features .false + )) + ;second pass + (call !DistributeCaseInfluenceWeights (assoc + case_ids all_case_ids + redistribute_weights_map neighbors_map + has_rebalance_features .false + )) + ;mark each case as not being kept at first (map (lambda diff --git a/howso/update_cases.amlg b/howso/update_cases.amlg index 83bcda29..ea59414b 100644 --- a/howso/update_cases.amlg +++ b/howso/update_cases.amlg @@ -829,6 +829,7 @@ case_ids (list) distribute_weight_feature ".case_weight" has_rebalance_features .false + redistribute_weights_map (null) ) (declare (assoc original_distribute_weight_feature distribute_weight_feature)) @@ -856,32 +857,45 @@ (lambda (let (assoc ;case weight value that needs to be distributed among the neighbors - case_weight (or (get (current_value 1) distribute_weight_feature) 1) + case_weight (get (current_value 1) distribute_weight_feature) + ) + + ;if case_weight is undefined, default it to 1 + (if (= (null) case_weight) + (assign (assoc case_weight 1)) + + ;if case has a weight of zero, skip it + (= 0 case_weight) + (conclude [0 {}]) ) (declare (assoc ;map of case_id -> weight closest_cases_map - (compute_on_contained_entities - ;don't consider cases whose weights should be distributed, since they are all about to be removed - (query_not_in_entity_list case_ids) - (query_nearest_generalized_distance - (get hyperparam_map "k") - (replace features) - ;case id - (current_index 1) - (get hyperparam_map "p") - (get hyperparam_map "featureWeights") - !queryDistanceTypeMap - (get hyperparam_map "featureDomainAttributes") - (get hyperparam_map "featureDeviations") - (null) - (get hyperparam_map "dt") - original_distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision + (if redistribute_weights_map + (get redistribute_weights_map (current_index 1)) + + (compute_on_contained_entities + ;don't consider cases whose weights should be distributed, since they are all about to be removed + (query_not_in_entity_list case_ids) + (query_nearest_generalized_distance + (get hyperparam_map "k") + (replace features) + ;case id + (current_index 1) + (get hyperparam_map "p") + (get hyperparam_map "featureWeights") + !queryDistanceTypeMap + (get hyperparam_map "featureDomainAttributes") + (get hyperparam_map "featureDeviations") + (null) + (get hyperparam_map "dt") + original_distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) ) ) )) @@ -985,6 +999,22 @@ )) ) + ;else redistributing weights to neighbors, by setting the weight directly (not accumulating) + ;and set weight 0 if none is to be redistributed + (size redistribute_weights_map) + ||(map + (lambda + (assign_to_entities (current_index) (associate + distribute_weight_feature (+ (or (last (current_value 1)))) + )) + ) + (zip case_ids) + ;reduce all the closest cases maps into one map of individual case -> total accumulated weight + (call !ReduceAssocsAddValues (assoc + list_of_assocs (map (lambda (last (current_value))) (values distributed_cases_maps)) + )) + ) + ;else no rebalance features, distribute the corresponding portion of this case's weight based on the neighbor's influence ||(map (lambda @@ -997,17 +1027,19 @@ ) ) - ;add the weight accumulated to each case to !dataMassChangeSinceLastAnalyze to ensure that cases trained as - ; only weights (whether through auto-ablation or otherwise) contribute to the progress towards the next auto-analyze, - ; if enabled. - (accum_to_entities (assoc - !dataMassChangeSinceLastAnalyze - ;sum of all case_weight values - (apply "+" (map - (lambda (first (current_value))) - (values distributed_cases_maps) - )) - )) + (if (= (null) redistribute_weights_map) + ;add the weight accumulated to each case to !dataMassChangeSinceLastAnalyze to ensure that cases trained as + ; only weights (whether through auto-ablation or otherwise) contribute to the progress towards the next auto-analyze, + ; if enabled. + (accum_to_entities (assoc + !dataMassChangeSinceLastAnalyze + ;sum of all case_weight values + (apply "+" (map + (lambda (first (current_value))) + (values distributed_cases_maps) + )) + )) + ) ) ;Helper method to reduce a list of assocs into one assoc with all the values summed up. From f99543d1931b5eb87bedcde16087ff017e0154e5 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Fri, 16 Jan 2026 15:48:27 -0500 Subject: [PATCH 15/39] added comment --- howso/ablation.amlg | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 626621d8..1466ce7b 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -663,7 +663,7 @@ ;second pass (call !DistributeCaseInfluenceWeights (assoc case_ids all_case_ids - redistribute_weights_map neighbors_map + redistribute_weights_map neighbors_map ;TODO: does this need to be recomputed after each blur pass? has_rebalance_features .false )) @@ -677,7 +677,6 @@ all_case_ids ) - #!ReduceComputeNeighborSurprisals (let (assoc From b8c4aea90e95144c526783f5337f6c0cbbc6be3a Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 15:53:28 -0500 Subject: [PATCH 16/39] switch to min mode --- howso/ablation.amlg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 1466ce7b..5b2371c9 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -710,7 +710,7 @@ (declare (assoc ;map of case to its core-set surprisal (the max surprisal to any case in the coreset for all cases) - case_to_css_map (map (lambda (+ 0)) (zip all_case_ids)) + case_to_css_map (map (lambda (+ .infinity)) (zip all_case_ids)) )) ;experimental params @@ -720,7 +720,7 @@ lowest_ns_cases_trunc_n (null) ;number of cases to select for keeping per iteration - cases_to_keep_per_iter 1 + cases_to_keep_per_iter 10 )) (while (not done) @@ -798,10 +798,10 @@ (declare (assoc new_case_css_map - (map + ||(map (lambda ;get their max surprisal to any of the cases_to_add - (apply "max" (values + (apply "min" (values (compute_on_contained_entities (query_in_entity_list cases_to_add) (query_within_generalized_distance @@ -833,7 +833,7 @@ ;take max of new max css and old max css (map (lambda - (max (current_value) (get case_to_css_map (current_index))) + (min (current_value) (get case_to_css_map (current_index))) ) new_case_css_map ) From 6dd2e651eb7d540d18c5f8dab54e74a9751c2b70 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Fri, 16 Jan 2026 16:32:49 -0500 Subject: [PATCH 17/39] remove zero weight and duplicates --- howso/ablation.amlg | 82 ++++++++++++++++++++++++++++++----------- howso/update_cases.amlg | 17 +++++++++ 2 files changed, 78 insertions(+), 21 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 1466ce7b..c1a85227 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -655,15 +655,71 @@ )) ;blur weights among neighbors - (call !DistributeCaseInfluenceWeights (assoc - case_ids all_case_ids - redistribute_weights_map neighbors_map - has_rebalance_features .false + (declare (assoc + duplicates + (call !DistributeCaseInfluenceWeights (assoc + case_ids all_case_ids + redistribute_weights_map neighbors_map + has_rebalance_features .false + )) )) + + ;merge duplicates if any exist + (if (size duplicates) + (call !ReduceMergeDuplicateCases (assoc + all_duplicate_cases_map (zip duplicates) + )) + ) + + (declare (assoc zero_weight_cases (contained_entities (query_equals distribute_weight_feature 0)) )) + ;remove zero-weight cases + (if (size zero_weight_cases) + (call !RemoveCases (assoc + cases zero_weight_cases + ;weight has already been distributed above during the first blur stop, don't do it again + distribute_weight_feature (null) + )) + ) + + ;dataset has been modified due to removal of dupes and zero weight cases + (if (or (size zero_weight_cases) (size duplicates)) + (seq + (assign (assoc all_case_ids (call !AllCases) )) + + (assign (assoc + neighbors_map + ||(map + (lambda + (compute_on_contained_entities + (query_not_in_entity_list [(current_index 1)]) + (query_nearest_generalized_distance + k_parameter + features + (current_index) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + dt_parameter + distribute_weight_feature ;TODO: should this be taken into account? + (rand) + (null) ;radius + !numericalPrecision + ) + ) + ) + (zip all_case_ids) + ) + )) + ) + ) + ;second pass (call !DistributeCaseInfluenceWeights (assoc case_ids all_case_ids - redistribute_weights_map neighbors_map ;TODO: does this need to be recomputed after each blur pass? + redistribute_weights_map neighbors_map has_rebalance_features .false )) @@ -1001,22 +1057,6 @@ )) duplicate_neighbors_map ) - - ;recompute influence weight entropy for the remaining no-longer duplicates - (declare (assoc - cases_too_far_map - (call !ComputeAndStoreInfluenceWeightEntropies (assoc - features features - weight_feature distribute_weight_feature - use_case_weights .true - compute_all .true - specific_case_ids (indices duplicate_neighbors_map) - )) - )) - - (if (size cases_too_far_map) - (accum (assoc case_duplicate_or_far_map cases_too_far_map)) - ) ) diff --git a/howso/update_cases.amlg b/howso/update_cases.amlg index ea59414b..07dffecc 100644 --- a/howso/update_cases.amlg +++ b/howso/update_cases.amlg @@ -846,6 +846,7 @@ )) ;default value of 1 for the accumulate_weight_feature new_weight_label_and_value (zip_labels (list distribute_weight_feature) (list 1)) + duplicates [] )) ;ensure the weight feature isn't among the features being used to find cases for distribution @@ -909,6 +910,10 @@ closest_cases_map (map 1 (filter (lambda (= (current_value) .infinity)) closest_cases_map) ) )) (assign (assoc total_influence (apply "+" (values closest_cases_map)) )) + + (if redistribute_weights_map + (accum (assoc duplicates (current_index 1))) + ) ) ;all cases are equally too distant, set their influence to be same @@ -917,6 +922,15 @@ closest_cases_map (map 1 closest_cases_map) total_influence (size closest_cases_map) )) + + ;if redistributing weights and this case is a duplicate, add it to the list of duplicates + (!= (null) redistribute_weights_map) + (if (and + (= "surprisal_to_prob" (get hyperparam_map "dt")) + (contains_value closest_cases_map 1) + ) + (accum (assoc duplicates (current_index 1))) + ) ) ;output pairs of: [ case_weight, distributed weight closest_cases_map] @@ -1040,6 +1054,9 @@ )) )) ) + + ;output list of duplicates + duplicates ) ;Helper method to reduce a list of assocs into one assoc with all the values summed up. From 902e976eed8dfa8070b4c209902fac9883e1a421 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 16:42:45 -0500 Subject: [PATCH 18/39] updating comments and such --- howso/ablation.amlg | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 868602ff..85a3cc73 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -605,9 +605,6 @@ )) ) - ;START facility something algo - ;approximate_num_cases_to_keep is the amount of cases to select - (declare (assoc hyperparam_map (call !GetHyperparameters (assoc weight_feature distribute_weight_feature)) )) @@ -776,7 +773,7 @@ lowest_ns_cases_trunc_n (null) ;number of cases to select for keeping per iteration - cases_to_keep_per_iter 10 + cases_to_keep_per_iter 5 )) (while (not done) @@ -856,7 +853,7 @@ new_case_css_map ||(map (lambda - ;get their max surprisal to any of the cases_to_add + ;get their min surprisal to any of the cases_to_add (apply "min" (values (compute_on_contained_entities (query_in_entity_list cases_to_add) @@ -886,7 +883,7 @@ (assign (assoc case_to_css_map - ;take max of new max css and old max css + ;take min of new min css and old min css (map (lambda (min (current_value) (get case_to_css_map (current_index))) From df2eb1d904bc47aeaacdf739819608bd4abc0314 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Sun, 18 Jan 2026 10:43:34 -0500 Subject: [PATCH 19/39] comment out blurring, add performance hack --- howso/ablation.amlg | 224 +++++++++++++++++++++++--------------------- 1 file changed, 118 insertions(+), 106 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 85a3cc73..0911737d 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -623,14 +623,113 @@ done .false )) + ; (declare (assoc + ; neighbors_map + ; ||(map + ; (lambda + ; (compute_on_contained_entities + ; (query_not_in_entity_list [(current_index 1)]) + ; (query_nearest_generalized_distance + ; k_parameter + ; features + ; (current_index) + ; p_parameter + ; feature_weights + ; !queryDistanceTypeMap + ; query_feature_attributes_map + ; feature_deviations + ; (null) + ; dt_parameter + ; distribute_weight_feature + ; (rand) + ; (null) ;radius + ; !numericalPrecision + ; ) + ; ) + ; ) + ; (zip all_case_ids) + ; ) + ; )) + + ; ;blur weights among neighbors + ; (declare (assoc + ; duplicates + ; (call !DistributeCaseInfluenceWeights (assoc + ; case_ids all_case_ids + ; redistribute_weights_map neighbors_map + ; has_rebalance_features .false + ; )) + ; )) + + ; ;merge duplicates if any exist + ; (if (size duplicates) + ; (call !ReduceMergeDuplicateCases (assoc + ; all_duplicate_cases_map (zip duplicates) + ; )) + ; ) + + ; (declare (assoc zero_weight_cases (contained_entities (query_equals distribute_weight_feature 0)) )) + ; ;remove zero-weight cases + ; (if (size zero_weight_cases) + ; (call !RemoveCases (assoc + ; cases zero_weight_cases + ; ;weight has already been distributed above during the first blur stop, don't do it again + ; distribute_weight_feature (null) + ; )) + ; ) + + ; ;dataset has been modified due to removal of dupes and zero weight cases + ; (if (or (size zero_weight_cases) (size duplicates)) + ; (seq + ; (assign (assoc all_case_ids (call !AllCases) )) + + ; (assign (assoc + ; neighbors_map + ; ||(map + ; (lambda + ; (compute_on_contained_entities + ; (query_not_in_entity_list [(current_index 1)]) + ; (query_nearest_generalized_distance + ; k_parameter + ; features + ; (current_index) + ; p_parameter + ; feature_weights + ; !queryDistanceTypeMap + ; query_feature_attributes_map + ; feature_deviations + ; (null) + ; dt_parameter + ; distribute_weight_feature ;TODO: should this be taken into account? + ; (rand) + ; (null) ;radius + ; !numericalPrecision + ; ) + ; ) + ; ) + ; (zip all_case_ids) + ; ) + ; )) + ; ) + ; ) + + ; ;second pass + ; (call !DistributeCaseInfluenceWeights (assoc + ; case_ids all_case_ids + ; redistribute_weights_map neighbors_map + ; has_rebalance_features .false + ; )) + + ;cache surprisals to the nearest 100 for each case + ;any cases that aren't in their most similar 100 can be considered to be 'too far away' (declare (assoc - neighbors_map + neighbor_surprisals_map ||(map (lambda (compute_on_contained_entities (query_not_in_entity_list [(current_index 1)]) (query_nearest_generalized_distance - k_parameter + 100 features (current_index) p_parameter @@ -639,7 +738,7 @@ query_feature_attributes_map feature_deviations (null) - dt_parameter + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) distribute_weight_feature (rand) (null) ;radius @@ -651,75 +750,6 @@ ) )) - ;blur weights among neighbors - (declare (assoc - duplicates - (call !DistributeCaseInfluenceWeights (assoc - case_ids all_case_ids - redistribute_weights_map neighbors_map - has_rebalance_features .false - )) - )) - - ;merge duplicates if any exist - (if (size duplicates) - (call !ReduceMergeDuplicateCases (assoc - all_duplicate_cases_map (zip duplicates) - )) - ) - - (declare (assoc zero_weight_cases (contained_entities (query_equals distribute_weight_feature 0)) )) - ;remove zero-weight cases - (if (size zero_weight_cases) - (call !RemoveCases (assoc - cases zero_weight_cases - ;weight has already been distributed above during the first blur stop, don't do it again - distribute_weight_feature (null) - )) - ) - - ;dataset has been modified due to removal of dupes and zero weight cases - (if (or (size zero_weight_cases) (size duplicates)) - (seq - (assign (assoc all_case_ids (call !AllCases) )) - - (assign (assoc - neighbors_map - ||(map - (lambda - (compute_on_contained_entities - (query_not_in_entity_list [(current_index 1)]) - (query_nearest_generalized_distance - k_parameter - features - (current_index) - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - distribute_weight_feature ;TODO: should this be taken into account? - (rand) - (null) ;radius - !numericalPrecision - ) - ) - ) - (zip all_case_ids) - ) - )) - ) - ) - - ;second pass - (call !DistributeCaseInfluenceWeights (assoc - case_ids all_case_ids - redistribute_weights_map neighbors_map - has_rebalance_features .false - )) - ;mark each case as not being kept at first (map (lambda @@ -763,7 +793,7 @@ (declare (assoc ;map of case to its core-set surprisal (the max surprisal to any case in the coreset for all cases) - case_to_css_map (map (lambda (+ .infinity)) (zip all_case_ids)) + case_to_css_map (map .infinity (zip all_case_ids)) )) ;experimental params @@ -783,7 +813,6 @@ (if (= (current_index 1) 0) ;on first iteration, just take lowest DC case (contained_entities - (query_exists !internalLabelSession) (query_equals "keeping" .false) (query_min ".neighbor_surprisal" 1 .true) ) @@ -793,7 +822,6 @@ (assoc lowest_ns_cases (contained_entities - (query_exists !internalLabelSession) (query_equals "keeping" .false) (if lowest_ns_cases_trunc_n (query_min ".neighbor_surprisal" lowest_ns_cases_trunc_n .true) @@ -842,42 +870,31 @@ ;mark new cases to keep (map (lambda - (assign_to_entities (current_value) (assoc - keeping .true - )) + (assign_to_entities (current_value) (assoc keeping .true )) ) cases_to_add ) + ;for each non-coreset case, determine surprisal to its closest case in the coreset (declare (assoc new_case_css_map ||(map (lambda - ;get their min surprisal to any of the cases_to_add - (apply "min" (values - (compute_on_contained_entities - (query_in_entity_list cases_to_add) - (query_within_generalized_distance - .infinity ;distance - features - (retrieve_from_entity (current_index) features) - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) - distribute_weight_feature - "fixed rand seed" - (null) ;radius - !numericalPrecision + (or + ;get the case's min surprisal to any of the cases_to_add, and if they are all too far away, + ;just output an exetremely large value + (apply "min" (values + (unzip + (get neighbor_surprisals_map (current_index)) + cases_to_add ) - ) - )) + )) + ;extremeley large value + 10e10 + ) ) ;all non-coreset cases - (zip (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false))) + (zip (contained_entities (query_equals "keeping" .false))) ) )) @@ -894,7 +911,6 @@ (if (>= (size (contained_entities - (query_exists !internalLabelSession) (query_equals "keeping" .true) )) !autoAblationMinNumCases @@ -911,11 +927,7 @@ (assign (assoc ;the list of case ids to be removed - cases - (contained_entities - (query_exists !internalLabelSession) - (query_equals "keeping" .false) - ) + cases (contained_entities (query_equals "keeping" .false) ) )) (if !tsTimeFeature From d0e43c1fe47b3654c0dd0e5866feaa755987c54d Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Mon, 19 Jan 2026 10:14:22 -0500 Subject: [PATCH 20/39] cleanup --- howso/ablation.amlg | 102 +------------------------------------------- 1 file changed, 1 insertion(+), 101 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 0911737d..ccfc606b 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -623,103 +623,6 @@ done .false )) - ; (declare (assoc - ; neighbors_map - ; ||(map - ; (lambda - ; (compute_on_contained_entities - ; (query_not_in_entity_list [(current_index 1)]) - ; (query_nearest_generalized_distance - ; k_parameter - ; features - ; (current_index) - ; p_parameter - ; feature_weights - ; !queryDistanceTypeMap - ; query_feature_attributes_map - ; feature_deviations - ; (null) - ; dt_parameter - ; distribute_weight_feature - ; (rand) - ; (null) ;radius - ; !numericalPrecision - ; ) - ; ) - ; ) - ; (zip all_case_ids) - ; ) - ; )) - - ; ;blur weights among neighbors - ; (declare (assoc - ; duplicates - ; (call !DistributeCaseInfluenceWeights (assoc - ; case_ids all_case_ids - ; redistribute_weights_map neighbors_map - ; has_rebalance_features .false - ; )) - ; )) - - ; ;merge duplicates if any exist - ; (if (size duplicates) - ; (call !ReduceMergeDuplicateCases (assoc - ; all_duplicate_cases_map (zip duplicates) - ; )) - ; ) - - ; (declare (assoc zero_weight_cases (contained_entities (query_equals distribute_weight_feature 0)) )) - ; ;remove zero-weight cases - ; (if (size zero_weight_cases) - ; (call !RemoveCases (assoc - ; cases zero_weight_cases - ; ;weight has already been distributed above during the first blur stop, don't do it again - ; distribute_weight_feature (null) - ; )) - ; ) - - ; ;dataset has been modified due to removal of dupes and zero weight cases - ; (if (or (size zero_weight_cases) (size duplicates)) - ; (seq - ; (assign (assoc all_case_ids (call !AllCases) )) - - ; (assign (assoc - ; neighbors_map - ; ||(map - ; (lambda - ; (compute_on_contained_entities - ; (query_not_in_entity_list [(current_index 1)]) - ; (query_nearest_generalized_distance - ; k_parameter - ; features - ; (current_index) - ; p_parameter - ; feature_weights - ; !queryDistanceTypeMap - ; query_feature_attributes_map - ; feature_deviations - ; (null) - ; dt_parameter - ; distribute_weight_feature ;TODO: should this be taken into account? - ; (rand) - ; (null) ;radius - ; !numericalPrecision - ; ) - ; ) - ; ) - ; (zip all_case_ids) - ; ) - ; )) - ; ) - ; ) - - ; ;second pass - ; (call !DistributeCaseInfluenceWeights (assoc - ; case_ids all_case_ids - ; redistribute_weights_map neighbors_map - ; has_rebalance_features .false - ; )) - ;cache surprisals to the nearest 100 for each case ;any cases that aren't in their most similar 100 can be considered to be 'too far away' (declare (assoc @@ -833,9 +736,7 @@ low_ns_case_scores (map (lambda - ;divide neighbor surprisal by coreset surprisal - - ;I think the LLM desc was wrong, so I flipped it to be coreset surprisal / neighbor surprisal + ;coreset surprisal / neighbor surprisal, the smaller the neighbor surprisal the larger this score (/ (get case_to_css_map (current_index)) (retrieve_from_entity (current_index) ".neighbor_surprisal") @@ -845,7 +746,6 @@ ) )) - ;sorting low dc cases by *decreasing* "score" and return the right amount (trunc (if (= 1 cases_to_keep_per_iter) From 4cb468b96625963bca58b8ba1e805022aa5d6eef Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Mon, 19 Jan 2026 10:40:51 -0500 Subject: [PATCH 21/39] update extremely large value --- howso/ablation.amlg | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 9d4037af..5ff4dff8 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -696,7 +696,8 @@ (declare (assoc ;map of case to its core-set surprisal (the max surprisal to any case in the coreset for all cases) - case_to_css_map (map .infinity (zip all_case_ids)) + ;set it to an extremely large value + case_to_css_map (map 10e13 (zip all_case_ids)) )) ;experimental params @@ -790,7 +791,7 @@ ) )) ;extremeley large value - 10e10 + 10e13 ) ) ;all non-coreset cases From c5507252a9dbd68bbccb62dc53589f4ca20ba53a Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Mon, 19 Jan 2026 13:49:51 -0500 Subject: [PATCH 22/39] batch size smarts --- howso/ablation.amlg | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 5ff4dff8..ba8557fa 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -707,7 +707,7 @@ lowest_ns_cases_trunc_n (null) ;number of cases to select for keeping per iteration - cases_to_keep_per_iter 5 + cases_to_keep_per_iter (min 30 (/ !autoAblationMinNumCases 100)) )) (while (not done) @@ -817,8 +817,6 @@ !autoAblationMinNumCases ) (assign (assoc done .true)) - - ; (call !ReduceComputeSmallestSurprisals) ) ) ) From 698ffca8a97ffe90ce724b1100c68f02b8a2dfc3 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:59:23 -0500 Subject: [PATCH 23/39] dynamic batch size, case cleanup, and more --- howso/ablation.amlg | 195 ++++++++++++++++++++++++++++---------------- 1 file changed, 123 insertions(+), 72 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index ba8557fa..04cc26ae 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -657,7 +657,7 @@ (map (lambda (accum_entity_roots (current_value) (zip_labels - ["keeping"] [.false] + [".keeping"] [.false] )) ) all_case_ids @@ -706,8 +706,17 @@ ;CAN BE NULLED lowest_ns_cases_trunc_n (null) - ;number of cases to select for keeping per iteration - cases_to_keep_per_iter (min 30 (/ !autoAblationMinNumCases 100)) + ;maximum number of cases to select for keeping per iteration + max_cases_to_keep_per_iter + (min + 50 + (ceil (/ !postReduceMaxCases 100)) + ) + + ;numeric value for the coreset-surprisal to neighbor-surprisal ratio + ;if the maximum ratio among non core-set features is lower than this value and + ;enough cases have already been selected, then selection is ended. + ratio_cutoff_value 5.0 )) (while (not done) @@ -717,7 +726,7 @@ (if (= (current_index 1) 0) ;on first iteration, just take lowest DC case (contained_entities - (query_equals "keeping" .false) + (query_equals ".keeping" .false) (query_min ".neighbor_surprisal" 1 .true) ) @@ -726,7 +735,7 @@ (assoc lowest_ns_cases (contained_entities - (query_equals "keeping" .false) + (query_equals ".keeping" .false) (if lowest_ns_cases_trunc_n (query_min ".neighbor_surprisal" lowest_ns_cases_trunc_n .true) ) @@ -747,10 +756,41 @@ ) )) + (declare (assoc + coreset_size (size (contained_entities (query_equals ".keeping" .true))) + )) + + (declare (assoc + cases_to_keep + (min + ;grow coreset in relation to its current size. + (max + 1 + (floor (/ coreset_size 10)) + ) + ;or use the maximum number of cases per iteration + max_cases_to_keep_per_iter + ;if approaching max size, select as many cases needed to reach limit. + (- !postReduceMaxCases coreset_size) + ) + )) + + ;if enough cases have been selected, and ratios have fallen below the cutoff, then end case selection + ;after this iteration. + (if (and + (>= coreset_size !autoAblationMinNumCases) + (< + (apply "max" (values low_ns_case_scores)) + ratio_cutoff_value + ) + ) + (assign (assoc done .true)) + ) + ;sorting low dc cases by *decreasing* "score" and return the right amount (trunc - (if (= 1 cases_to_keep_per_iter) - (index_max low_ns_case_scores) + (if (= 1 cases_to_keep) + (trunc (index_max low_ns_case_scores) 1) (sort (lambda @@ -762,7 +802,7 @@ lowest_ns_cases ) ) - cases_to_keep_per_iter + cases_to_keep ) ) ) @@ -771,7 +811,7 @@ ;mark new cases to keep (map (lambda - (assign_to_entities (current_value) (assoc keeping .true )) + (assign_to_entities (current_value) (assoc ".keeping" .true )) ) cases_to_add ) @@ -795,7 +835,7 @@ ) ) ;all non-coreset cases - (zip (contained_entities (query_equals "keeping" .false))) + (zip (contained_entities (query_equals ".keeping" .false))) ) )) @@ -812,86 +852,83 @@ (if (>= (size (contained_entities - (query_equals "keeping" .true) + (query_equals ".keeping" .true) )) - !autoAblationMinNumCases + !postReduceMaxCases ) (assign (assoc done .true)) ) ) ) - - ;END facility something algo - (assign (assoc ;the list of case ids to be removed - cases (contained_entities (query_equals "keeping" .false) ) + cases (contained_entities (query_equals ".keeping" .false) ) )) - (if !tsTimeFeature - (let - (assoc - entire_series_removal_id_queries - ;select those series identifiers where there will be less than 3 cases remaining after this removal pass - ;because these entire series should be removed at that point - (filter - (lambda - (< - (size (contained_entities - (query_not_in_entity_list cases) - ;(current_value) is in the format of (list (query_equals "series_feature_id" value) ... ) for all affected series ids - (current_value) - )) - 3 - ) + (if !tsTimeFeature + (let + (assoc + entire_series_removal_id_queries + ;select those series identifiers where there will be less than 3 cases remaining after this removal pass + ;because these entire series should be removed at that point + (filter + (lambda + (< + (size (contained_entities + (query_not_in_entity_list cases) + ;(current_value) is in the format of (list (query_equals "series_feature_id" value) ... ) for all affected series ids + (current_value) + )) + 3 ) - ;a list of affected series identifier queries for this batch of 'cases' - (call !GenerateUniqueSeriesQueries (assoc - series_id_features (get !tsFeaturesMap "series_id_features") - case_ids - ;of the selected cases, only keep those that were either the first or last case from a series - (append - (contained_entities - (query_in_entity_list cases) - (query_equals ".reverse_series_index" 0) - ) - (contained_entities - (query_in_entity_list cases) - (query_equals ".series_index" 0) - ) - ) - )) - ) - ) - - ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series - (assign (assoc - cases - (contained_entities - (query_in_entity_list cases) - (query_not_equals ".reverse_series_index" 0) - (query_not_equals ".series_index" 0) ) - )) - - ;there were series that will need to be entirely removed, add all those series cases for removal - (if (size entire_series_removal_id_queries) - (accum (assoc - cases - (apply "append" (map - (lambda + ;a list of affected series identifier queries for this batch of 'cases' + (call !GenerateUniqueSeriesQueries (assoc + series_id_features (get !tsFeaturesMap "series_id_features") + case_ids + ;of the selected cases, only keep those that were either the first or last case from a series + (append + (contained_entities + (query_in_entity_list cases) + (query_equals ".reverse_series_index" 0) + ) (contained_entities - (query_not_in_entity_list cases) - (current_value) + (query_in_entity_list cases) + (query_equals ".series_index" 0) ) ) - entire_series_removal_id_queries - )) - )) - ) + )) + ) + ) + + ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series + (assign (assoc + cases + (contained_entities + (query_in_entity_list cases) + (query_not_equals ".reverse_series_index" 0) + (query_not_equals ".series_index" 0) + ) + )) + + ;there were series that will need to be entirely removed, add all those series cases for removal + (if (size entire_series_removal_id_queries) + (accum (assoc + cases + (apply "append" (map + (lambda + (contained_entities + (query_not_in_entity_list cases) + (current_value) + ) + ) + entire_series_removal_id_queries + )) + )) ) ) + ) (if (size cases) (seq @@ -938,6 +975,20 @@ ) ) + ;remove added features + (map + (lambda (assign_entity_roots + (current_value) + (filter + (lambda + (not (contains_value ["#.keeping" "#.neighbor_surprisal"] (first (get_labels (current_value))) )) + ) + (retrieve_entity_root (current_value)) + ) + )) + (call !AllCases) + ) + ;if the number of cases has been reduced by 'e' or more, auto analyze if needed (if (< (call !GetNumTrainingCases) (/ num_cases 2.718281828459)) From 1043b5e054267337ed24fba435b272d889b743d0 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Mon, 19 Jan 2026 16:21:05 -0500 Subject: [PATCH 24/39] unnecessary trunc --- howso/ablation.amlg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 04cc26ae..ad7bab82 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -790,7 +790,7 @@ ;sorting low dc cases by *decreasing* "score" and return the right amount (trunc (if (= 1 cases_to_keep) - (trunc (index_max low_ns_case_scores) 1) + (index_max low_ns_case_scores) (sort (lambda From ced55b565cf0f68d220f07a9a55e94f306c79f37 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Mon, 19 Jan 2026 16:41:19 -0500 Subject: [PATCH 25/39] ensure reduce_max_cases is an int --- howso/ablation.amlg | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index ad7bab82..fb5fdd6f 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -569,6 +569,9 @@ output (assoc) )) + ;ensure it's an integer + (assign (assoc reduce_max_cases (floor reduce_max_cases) )) + ;Ensure that ablation is initialized before we begin. (if (not !hasPopulatedCaseWeight) (call !InitializeAutoAblation (assoc @@ -710,7 +713,7 @@ max_cases_to_keep_per_iter (min 50 - (ceil (/ !postReduceMaxCases 100)) + (ceil (/ reduce_max_cases 100)) ) ;numeric value for the coreset-surprisal to neighbor-surprisal ratio @@ -771,7 +774,7 @@ ;or use the maximum number of cases per iteration max_cases_to_keep_per_iter ;if approaching max size, select as many cases needed to reach limit. - (- !postReduceMaxCases coreset_size) + (- reduce_max_cases coreset_size) ) )) @@ -854,7 +857,7 @@ (size (contained_entities (query_equals ".keeping" .true) )) - !postReduceMaxCases + reduce_max_cases ) (assign (assoc done .true)) ) From d8cf3ace47a8755634497648117e102b22baf6f3 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 09:37:18 -0500 Subject: [PATCH 26/39] fixes early out logic --- howso/ablation.amlg | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index fb5fdd6f..7299481b 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -578,17 +578,10 @@ weight_feature distribute_weight_feature )) ) - (declare (assoc - num_cases (call !GetNumTrainingCases) - - ;reduction will stop within batch_size of reduce_max_cases, so if the gap between - ;reduce_max_cases and !autoAblationMinNumCases (max and min) cases is larger than batch_size, - ;the number of cases that need to be kept is approximately: max - batch_size, but can't be less than min. - approximate_num_cases_to_keep (max (- reduce_max_cases batch_size) !autoAblationMinNumCases) - )) + (declare (assoc num_cases (call !GetNumTrainingCases) )) ;nothing needed to reduce since the dataset is already small enough - (if (>= approximate_num_cases_to_keep num_cases) + (if (>= (max reduce_max_cases !autoAblationMinNumCases) num_cases) (conclude (call !Return (assoc payload output)) ) From fc1ae9eb5157b8e26c751facd14dbabfb75e5cc6 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 10:50:37 -0500 Subject: [PATCH 27/39] code cleanup, removed unused parameters --- howso/ablation.amlg | 193 ++++++++++++-------------------------------- 1 file changed, 52 insertions(+), 141 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 7299481b..f480ac85 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -531,24 +531,6 @@ ;stores the maximum number of cases that may remain after data is reduced ; default to the value stored within the Trainee via 'set_auto_ablation_params', which defaults to 50,000. reduce_max_cases !postReduceMaxCases - ;{ref "AblationThresholdMap"} - ;a map of measure names (any of the prediction stats) to a map of feature names to threshold value. - ; absolute thresholds will cause data reduction to stop when any of the measure values for any of - ; the features for which a threshold is defined go above the threshold (in the case of rmse and - ; mae) or below the threshold (otherwise). - abs_threshold_map (assoc) - ;{ref "AblationThresholdMap"} - ;a map of measure names (any of the prediction stats) to a map of feature names to threshold value. - ; delta thresholds will cause data reduction to stop when the delta between any of the measure values - ; for any of the features for which a threshold is defined and its previous value go above the threshold - ; (in the case of rmse and mae) or below the threshold (otherwise). - delta_threshold_map (assoc) - ;{ref "AblationThresholdMap"} - ;a map of measure names (any of the prediction stats) to a map of feature names to threshold value. - ; relative thresholds will cause data reduction to stop when the relative change between any of the - ; measure values for any of the features for which a threshold is defined and its previous value go - ; above the threshold (in the case of rmse and mae) or below the threshold (otherwise). - rel_threshold_map (assoc) ;{type "boolean"} ;skip auto analyzing as cases are removed skip_auto_analyze .false @@ -564,8 +546,6 @@ ;Declare variables for internal use. (declare (assoc cases (list) - prev_prediction_stats_map (assoc) - thresholds_enabled (or (size abs_threshold_map) (size delta_threshold_map) (size rel_threshold_map) ) output (assoc) )) @@ -587,20 +567,6 @@ ) ) - (if thresholds_enabled - (assign (assoc - prev_prediction_stats_map - (get - (call !CalculateFeatureResiduals (assoc - weight_feature distribute_weight_feature - use_case_weights .true - compute_all_statistics .true - )) - "prediction_stats" - ) - )) - ) - (declare (assoc hyperparam_map (call !GetHyperparameters (assoc weight_feature distribute_weight_feature)) )) @@ -612,9 +578,7 @@ dt_parameter (get hyperparam_map "dt") feature_deviations (get hyperparam_map "featureDeviations") query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") - )) - (declare (assoc all_case_ids (call !AllCases) done .false )) @@ -659,46 +623,37 @@ all_case_ids ) - #!ReduceComputeNeighborSurprisals - (let - (assoc - case_neighbor_surprisal_map - (compute_on_contained_entities - ||(query_entity_distance_contributions - 1 - features - all_case_ids - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) - distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) + ;compute and store surprisal to each cases's most similar neighbor + (call !StoreCaseValues (assoc + case_values_map + (compute_on_contained_entities + ||(query_entity_distance_contributions + 1 + features + all_case_ids + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) + distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision ) - ) - - (call !StoreCaseValues (assoc - case_values_map case_neighbor_surprisal_map - label_name ".neighbor_surprisal" - )) - ) + ) + label_name ".neighbor_surprisal" + )) (declare (assoc ;map of case to its core-set surprisal (the max surprisal to any case in the coreset for all cases) ;set it to an extremely large value case_to_css_map (map 10e13 (zip all_case_ids)) - )) - ;experimental params - (declare (assoc - ;the amount of lowest neighbor-surprisal cases to consider for keeping + ;Experimental parameter: the amount of lowest neighbor-surprisal cases to consider for keeping ;CAN BE NULLED lowest_ns_cases_trunc_n (null) @@ -784,21 +739,20 @@ ) ;sorting low dc cases by *decreasing* "score" and return the right amount - (trunc - (if (= 1 cases_to_keep) - (index_max low_ns_case_scores) - (sort - (lambda - (- - (get low_ns_case_scores (current_value 1)) - (get low_ns_case_scores (current_value)) - ) + (if (= 1 cases_to_keep) + (trunc (index_max low_ns_case_scores) 1) + + (sort + (lambda + (- + (get low_ns_case_scores (current_value 1)) + (get low_ns_case_scores (current_value)) ) - lowest_ns_cases ) + lowest_ns_cases + cases_to_keep ) - cases_to_keep ) ) ) @@ -812,22 +766,26 @@ cases_to_add ) - ;for each non-coreset case, determine surprisal to its closest case in the coreset - (declare (assoc - new_case_css_map + (assign (assoc + case_to_css_map + ;for every non-coreset case, store min of newly computed min css and old min css ||(map (lambda - (or - ;get the case's min surprisal to any of the cases_to_add, and if they are all too far away, - ;just output an exetremely large value - (apply "min" (values - (unzip - (get neighbor_surprisals_map (current_index)) - cases_to_add - ) - )) - ;extremeley large value - 10e13 + (min + ;for each non-coreset case, determine surprisal to its closest case in the coreset + (or + ;get the case's min surprisal to any of the cases_to_add, and if they are all too far away, + ;just output an exetremely large value + (apply "min" (values + (unzip + (get neighbor_surprisals_map (current_index)) + cases_to_add + ) + )) + ;extremeley large value + 10e13 + ) + (get case_to_css_map (current_index)) ) ) ;all non-coreset cases @@ -835,17 +793,6 @@ ) )) - (assign (assoc - case_to_css_map - ;take min of new min css and old min css - (map - (lambda - (min (current_value) (get case_to_css_map (current_index))) - ) - new_case_css_map - ) - )) - (if (>= (size (contained_entities (query_equals ".keeping" .true) @@ -932,42 +879,6 @@ cases cases distribute_weight_feature distribute_weight_feature )) - - (if thresholds_enabled - (let - (assoc - batch_threshold_info (null) - new_prediction_stats_map - (get - (call !CalculateFeatureResiduals (assoc - weight_feature distribute_weight_feature - use_case_weights .true - compute_all_statistics .true - )) - "prediction_stats" - ) - ) - (assign (assoc - batch_threshold_info - (call !CheckThresholds (assoc - abs_threshold_map abs_threshold_map - delta_threshold_map delta_threshold_map - rel_threshold_map rel_threshold_map - prev_prediction_stats_map prev_prediction_stats_map - new_prediction_stats_map new_prediction_stats_map - )) - )) - (if (apply "or" (values batch_threshold_info)) - (seq - (accum "output" ["threshold_info"] batch_threshold_info) - (conclude) - ) - (assign (assoc - prev_prediction_stats_map new_prediction_stats_map - )) - ) - ) - ) ) ) From 1d054cbce08c8a4ca6b56405ccca804dce1eecbc Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 11:04:14 -0500 Subject: [PATCH 28/39] more cleanup --- howso/ablation.amlg | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index f480ac85..ea621d4f 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -739,7 +739,6 @@ ) ;sorting low dc cases by *decreasing* "score" and return the right amount - (if (= 1 cases_to_keep) (trunc (index_max low_ns_case_scores) 1) @@ -874,12 +873,10 @@ ) (if (size cases) - (seq - (call !RemoveCases (assoc - cases cases - distribute_weight_feature distribute_weight_feature - )) - ) + (call !RemoveCases (assoc + cases cases + distribute_weight_feature distribute_weight_feature + )) ) ;remove added features From a233fe44f017caaa94ef35a5aaab59fb1dfc5bc1 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 11:20:44 -0500 Subject: [PATCH 29/39] update unit test --- howso/scale.amlg | 3 --- howso/train.amlg | 3 --- unit_tests/ut_h_reduce_data.amlg | 38 ++------------------------------ 3 files changed, 2 insertions(+), 42 deletions(-) diff --git a/howso/scale.amlg b/howso/scale.amlg index da7e762f..d60ba474 100644 --- a/howso/scale.amlg +++ b/howso/scale.amlg @@ -539,9 +539,6 @@ (size !hyperparameterMetadataMap) ) (call reduce_data (assoc - abs_threshold_map !autoAblationAbsThresholdMap - delta_threshold_map !autoAblationDeltaThresholdMap - rel_threshold_map !autoAblationRelThresholdMap skip_auto_analyze skip_auto_analyze )) diff --git a/howso/train.amlg b/howso/train.amlg index 64785a12..2ab7e2b3 100644 --- a/howso/train.amlg +++ b/howso/train.amlg @@ -982,9 +982,6 @@ ) (call reduce_data (assoc features features - abs_threshold_map !autoAblationAbsThresholdMap - delta_threshold_map !autoAblationDeltaThresholdMap - rel_threshold_map !autoAblationRelThresholdMap skip_auto_analyze skip_auto_analyze )) diff --git a/unit_tests/ut_h_reduce_data.amlg b/unit_tests/ut_h_reduce_data.amlg index bea1e481..79c7fd28 100644 --- a/unit_tests/ut_h_reduce_data.amlg +++ b/unit_tests/ut_h_reduce_data.amlg @@ -175,7 +175,7 @@ obs (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count")) )) - (print "reduce_data with no thresholds reduces down to minimum model size: ") + (print "reduce_data reduces down to minimum model size: ") (call_entity "howso" "reduce_data") (call assert_same (assoc exp @@ -186,43 +186,9 @@ obs (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count")) )) - (call exit_if_failures (assoc msg "reduce_data with no thresholds reduced as expected")) + (call exit_if_failures (assoc msg "reduce_data reduced as expected")) (call !EmptyAndTrain) - (print "reduce_data with very liberal thresholds reduces down to minimum model size: ") - (call_entity "howso" "reduce_data" (assoc - abs_threshold_map (assoc accuracy (assoc species 0.1)) - delta_threshold_map (assoc accuracy (assoc species 0.5)) - rel_threshold_map (assoc accuracy (assoc species 0.5)) - )) - (call assert_same (assoc - exp - (get - (call_entity "howso" "get_auto_ablation_params") - (list 1 "payload" "min_num_cases") - ) - obs - (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count")) - )) - (call exit_if_failures (assoc msg "reduce_data with liberal thresholds reduced as expected")) - (call !EmptyAndTrain) - - (print "reduce_data with conservative thresholds reduces to above minimum model size: ") - (call_entity "howso" "reduce_data" (assoc - abs_threshold_map (assoc accuracy (assoc species 1)) - delta_threshold_map (assoc accuracy (assoc species 0.001)) - rel_threshold_map (assoc accuracy (assoc species 0.001)) - )) - (call assert_same (assoc - exp - (- - (size training_data) - (call_entity "howso""debug_label" (assoc label "!ablationBatchSize")) - ) - obs - (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count")) - )) - (call exit_if_failures (assoc msg "reduce_data with conservative thresholds reduced as expected")) (print "a basic train automatically calls reduce_data when the threshold is low enough, resulting in min. model size w/ no threshold maps: ") (call !EmptyAndTrain (assoc enable_auto_ablation .true max_num_cases 120)) From ddabaed9d8ec76a630a93cf849b0e08a5d7bb6a5 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 11:45:07 -0500 Subject: [PATCH 30/39] fix unit test, version bump --- unit_tests/ut_h_scale_ablation.amlg | 47 ++++++++++++----------------- version.json | 2 +- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/unit_tests/ut_h_scale_ablation.amlg b/unit_tests/ut_h_scale_ablation.amlg index 131107ea..aa634bcf 100644 --- a/unit_tests/ut_h_scale_ablation.amlg +++ b/unit_tests/ut_h_scale_ablation.amlg @@ -33,11 +33,12 @@ )) (print "set_auto_ablation_params\n") - (call_entity "howso" "set_auto_ablation_params" (assoc - auto_ablation_enabled .true - min_num_cases 100 - max_num_cases 200 - )) + (call_entity "howso" "set_auto_ablation_params" (assoc + auto_ablation_enabled .true + min_num_cases 100 + max_num_cases 200 + reduce_max_cases 100 + )) (print "train\n") (call_entity "howso" "train" (assoc @@ -63,32 +64,20 @@ obs (first first_pass_cases) exp 1 )) - (print "only '998' and cases under '100' remain: ") + (print "only 100 cases remain: ") (call assert_true (assoc - obs - (apply "and" - (map - (lambda (or (< (first (current_value)) 100) (= (first (current_value)) 998))) + obs (= + 100 + (size (get first_pass_cases (list 1 "payload" "cases")) ) ) )) - ;train some more cases with medium numbers. - - (print "train\n") - (call_entity "howso" "train" (assoc - cases (call !CreateSquareCases (assoc xs (range 400 498))) - features (list "x" "y") - session "unit_test" - )) - - ; set up training a specific number of medium-valued cases, anything near here was dropped in the first - ; reduce_data call, but we've loaded in some duplicate cases. - + ;would be payload for cases in the 400-500 range (declare (assoc train_payload (call_entity "howso" "compute_train_payload" (assoc - cases (call !CreateSquareCases (assoc xs (range 450 455))) + cases (call !CreateSquareCases (assoc xs (range 400 500))) features (list "x" "y") session "unit_test" )) @@ -100,12 +89,16 @@ )) ; With that payload in hand, let's reduce_data again. - + (call_entity "howso" "set_auto_ablation_params" (assoc + auto_ablation_enabled .true + min_num_cases 25 + max_num_cases 100 + reduce_max_cases 25 + )) (print "reduce_data\n") - (call_entity "howso" "reduce_data" (assoc)) - - ; This will again drop a lot of the medium-valued cases, so we're going to fail committing the payload. + (call_entity "howso" "reduce_data") + ; This will drop a lot of the cases we need, so we're going to fail committing the payload. (print "process_train_payload failed: ") (call assert_same (assoc obs (call_entity "howso" "process_train_payload" (get train_payload (list 1 "payload"))) diff --git a/version.json b/version.json index bac26318..1cc02027 100644 --- a/version.json +++ b/version.json @@ -1,6 +1,6 @@ { "version": "0.0.0", "dependencies": { - "amalgam": "71.0.4" + "amalgam": "71.0.5" } } From d89fee4cb048ac502dbbf5ec86f0a74a5832dcd0 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 11:51:52 -0500 Subject: [PATCH 31/39] revert changes to update_cases --- howso/update_cases.amlg | 113 ++++++++++++---------------------------- 1 file changed, 32 insertions(+), 81 deletions(-) diff --git a/howso/update_cases.amlg b/howso/update_cases.amlg index 07dffecc..83bcda29 100644 --- a/howso/update_cases.amlg +++ b/howso/update_cases.amlg @@ -829,7 +829,6 @@ case_ids (list) distribute_weight_feature ".case_weight" has_rebalance_features .false - redistribute_weights_map (null) ) (declare (assoc original_distribute_weight_feature distribute_weight_feature)) @@ -846,7 +845,6 @@ )) ;default value of 1 for the accumulate_weight_feature new_weight_label_and_value (zip_labels (list distribute_weight_feature) (list 1)) - duplicates [] )) ;ensure the weight feature isn't among the features being used to find cases for distribution @@ -858,45 +856,32 @@ (lambda (let (assoc ;case weight value that needs to be distributed among the neighbors - case_weight (get (current_value 1) distribute_weight_feature) - ) - - ;if case_weight is undefined, default it to 1 - (if (= (null) case_weight) - (assign (assoc case_weight 1)) - - ;if case has a weight of zero, skip it - (= 0 case_weight) - (conclude [0 {}]) + case_weight (or (get (current_value 1) distribute_weight_feature) 1) ) (declare (assoc ;map of case_id -> weight closest_cases_map - (if redistribute_weights_map - (get redistribute_weights_map (current_index 1)) - - (compute_on_contained_entities - ;don't consider cases whose weights should be distributed, since they are all about to be removed - (query_not_in_entity_list case_ids) - (query_nearest_generalized_distance - (get hyperparam_map "k") - (replace features) - ;case id - (current_index 1) - (get hyperparam_map "p") - (get hyperparam_map "featureWeights") - !queryDistanceTypeMap - (get hyperparam_map "featureDomainAttributes") - (get hyperparam_map "featureDeviations") - (null) - (get hyperparam_map "dt") - original_distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) + (compute_on_contained_entities + ;don't consider cases whose weights should be distributed, since they are all about to be removed + (query_not_in_entity_list case_ids) + (query_nearest_generalized_distance + (get hyperparam_map "k") + (replace features) + ;case id + (current_index 1) + (get hyperparam_map "p") + (get hyperparam_map "featureWeights") + !queryDistanceTypeMap + (get hyperparam_map "featureDomainAttributes") + (get hyperparam_map "featureDeviations") + (null) + (get hyperparam_map "dt") + original_distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision ) ) )) @@ -910,10 +895,6 @@ closest_cases_map (map 1 (filter (lambda (= (current_value) .infinity)) closest_cases_map) ) )) (assign (assoc total_influence (apply "+" (values closest_cases_map)) )) - - (if redistribute_weights_map - (accum (assoc duplicates (current_index 1))) - ) ) ;all cases are equally too distant, set their influence to be same @@ -922,15 +903,6 @@ closest_cases_map (map 1 closest_cases_map) total_influence (size closest_cases_map) )) - - ;if redistributing weights and this case is a duplicate, add it to the list of duplicates - (!= (null) redistribute_weights_map) - (if (and - (= "surprisal_to_prob" (get hyperparam_map "dt")) - (contains_value closest_cases_map 1) - ) - (accum (assoc duplicates (current_index 1))) - ) ) ;output pairs of: [ case_weight, distributed weight closest_cases_map] @@ -1013,22 +985,6 @@ )) ) - ;else redistributing weights to neighbors, by setting the weight directly (not accumulating) - ;and set weight 0 if none is to be redistributed - (size redistribute_weights_map) - ||(map - (lambda - (assign_to_entities (current_index) (associate - distribute_weight_feature (+ (or (last (current_value 1)))) - )) - ) - (zip case_ids) - ;reduce all the closest cases maps into one map of individual case -> total accumulated weight - (call !ReduceAssocsAddValues (assoc - list_of_assocs (map (lambda (last (current_value))) (values distributed_cases_maps)) - )) - ) - ;else no rebalance features, distribute the corresponding portion of this case's weight based on the neighbor's influence ||(map (lambda @@ -1041,22 +997,17 @@ ) ) - (if (= (null) redistribute_weights_map) - ;add the weight accumulated to each case to !dataMassChangeSinceLastAnalyze to ensure that cases trained as - ; only weights (whether through auto-ablation or otherwise) contribute to the progress towards the next auto-analyze, - ; if enabled. - (accum_to_entities (assoc - !dataMassChangeSinceLastAnalyze - ;sum of all case_weight values - (apply "+" (map - (lambda (first (current_value))) - (values distributed_cases_maps) - )) - )) - ) - - ;output list of duplicates - duplicates + ;add the weight accumulated to each case to !dataMassChangeSinceLastAnalyze to ensure that cases trained as + ; only weights (whether through auto-ablation or otherwise) contribute to the progress towards the next auto-analyze, + ; if enabled. + (accum_to_entities (assoc + !dataMassChangeSinceLastAnalyze + ;sum of all case_weight values + (apply "+" (map + (lambda (first (current_value))) + (values distributed_cases_maps) + )) + )) ) ;Helper method to reduce a list of assocs into one assoc with all the values summed up. From fae035a77cd8885fef38126f50dc07a6a77cdc04 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 13:14:18 -0500 Subject: [PATCH 32/39] remove unused batch_size parameter --- howso/ablation.amlg | 3 --- 1 file changed, 3 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index ea621d4f..64e2ada1 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -534,9 +534,6 @@ ;{type "boolean"} ;skip auto analyzing as cases are removed skip_auto_analyze .false - ;{type "number"} - ;the number of cases to ablate between analyses and influence weight entropy recalculation - batch_size !ablationBatchSize ) (call !ValidateParameters) From 7eb6df6ce5584a1be244a565bdb6ebc260d7a39f Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:28:15 -0500 Subject: [PATCH 33/39] removed ts rederivation after removal --- howso/remove_cases.amlg | 56 ----------------------------------------- 1 file changed, 56 deletions(-) diff --git a/howso/remove_cases.amlg b/howso/remove_cases.amlg index eb80c57e..09574af4 100644 --- a/howso/remove_cases.amlg +++ b/howso/remove_cases.amlg @@ -270,52 +270,6 @@ ) ) - (declare (assoc re_derivation_series_case_ids (null) )) - - ;check if time series dataset, if so, need to pull all cases from affected series so that their - ;derived features can be-rederived below after cases are removed - (if (and (!= (null) !tsTimeFeature) (size !derivedFeaturesMap)) - (let - (assoc series_id_features (get !tsFeaturesMap "series_id_features") ) - - ;for each case pull the list of series id values for all the series id features - (declare (assoc - series_id_lists - (values (map - (lambda (retrieve_from_entity (current_value) series_id_features)) - cases - ) .true) - )) - - ;for each series affected, get the cases so they can be re-derived - (declare (assoc - per_series_cases - (map - (lambda (let - (assoc - series_id_values (current_value 1) - ) - - (contained_entities - (values (map - (lambda - (query_equals (current_index) (current_value)) - ) - (zip series_id_features series_id_values) - )) - ) - )) - series_id_lists - ) - )) - - ;collapse all lists of cases into one list of unique case ids - (assign (assoc - re_derivation_series_case_ids (apply "append" per_series_cases) - )) - ) - ) - ;remove all the cases (apply "destroy_entities" cases) @@ -370,16 +324,6 @@ ;dataset has changed so clear out these cached value (call !ClearCachedCountsAndEntropies) - ;if there are cases that need to have features re-derived, do it here - (if re_derivation_series_case_ids - (call !DeriveTrainFeatures (assoc - features !trainedFeatures - ;keep and derive only those features that are not in the features list - derived_features (get !tsFeaturesMap "ts_derived_features") - case_ids re_derivation_series_case_ids - )) - ) - (call !UpdateHasNulls (assoc features !trainedFeatures)) (call !UpdateRegionalMinSize (assoc From d85aa3ee27d6ab05e1538e264a86c5058deb9378 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:44:36 -0500 Subject: [PATCH 34/39] speedup for storing smallest surprisals --- howso/ablation.amlg | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 64e2ada1..d419bbbf 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -620,27 +620,12 @@ all_case_ids ) - ;compute and store surprisal to each cases's most similar neighbor + ;store the surprisal to each cases's most similar neighbor (call !StoreCaseValues (assoc case_values_map - (compute_on_contained_entities - ||(query_entity_distance_contributions - 1 - features - all_case_ids - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) - distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) + (map + (lambda (apply "min" (values (current_value)))) + neighbor_surprisals_map ) label_name ".neighbor_surprisal" )) From 3ace85ac000d7a0f661ba45f5fbefa49ec10b071 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:52:42 -0500 Subject: [PATCH 35/39] version bump --- version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.json b/version.json index 1cc02027..24586f46 100644 --- a/version.json +++ b/version.json @@ -1,6 +1,6 @@ { "version": "0.0.0", "dependencies": { - "amalgam": "71.0.5" + "amalgam": "71.0.6" } } From 74d1ae18ec764516bc8c818d7472e3e513332174 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 15:02:51 -0500 Subject: [PATCH 36/39] fix unit test --- unit_tests/ut_h_derive_start_end.amlg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unit_tests/ut_h_derive_start_end.amlg b/unit_tests/ut_h_derive_start_end.amlg index 11ed244f..0c903f84 100644 --- a/unit_tests/ut_h_derive_start_end.amlg +++ b/unit_tests/ut_h_derive_start_end.amlg @@ -139,7 +139,7 @@ )) - ;get the earlier case and check that its derived values are updated + ;get the earlier case and check that its derived values stay the same (assign (assoc result (get @@ -158,7 +158,7 @@ ) )) - (print "Validate case derived features are updated correctly: ") + (print "Validate case derived features don't change: ") (call assert_approximate (assoc obs mike_case_3 exp @@ -166,13 +166,13 @@ ".series_index" 2 ".reverse_series_index" 1 ".session_training_index" 4 - ".time_delta_1" 1800 - ".time_lag_1" 1600 + ".time_delta_1" 1400 + ".time_lag_1" 2000 "name" "mike" "time" 3400 ) )) - (call exit_if_failures (assoc msg "Re-derived features correctly after case removal.")) + (call exit_if_failures (assoc msg "derived features remain as they were after case removal.")) (call exit_if_failures (assoc msg unit_test_name )) ) \ No newline at end of file From 370806a41019f9333c37fc58d14ea39698621198 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Tue, 20 Jan 2026 15:10:12 -0500 Subject: [PATCH 37/39] ut fix --- unit_tests/ut_h_derive_start_end.amlg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_tests/ut_h_derive_start_end.amlg b/unit_tests/ut_h_derive_start_end.amlg index 0c903f84..eb0970f6 100644 --- a/unit_tests/ut_h_derive_start_end.amlg +++ b/unit_tests/ut_h_derive_start_end.amlg @@ -163,7 +163,7 @@ obs mike_case_3 exp (assoc - ".series_index" 2 + ".series_index" 3 ".reverse_series_index" 1 ".session_training_index" 4 ".time_delta_1" 1400 From c90b3066dab62eb66bc61a4535b45690aefe1fc1 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Tue, 20 Jan 2026 15:19:35 -0500 Subject: [PATCH 38/39] cap at 30 --- howso/ablation.amlg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index d419bbbf..60d4d611 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -642,7 +642,7 @@ ;maximum number of cases to select for keeping per iteration max_cases_to_keep_per_iter (min - 50 + 30 (ceil (/ reduce_max_cases 100)) ) From 007e9465c27f82bbe0efab4a10cf4b028d5291c2 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Tue, 20 Jan 2026 15:28:33 -0500 Subject: [PATCH 39/39] renames --- howso/ablation.amlg | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 60d4d611..865bf05c 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -542,7 +542,6 @@ ;Declare variables for internal use. (declare (assoc - cases (list) output (assoc) )) @@ -694,7 +693,7 @@ )) (declare (assoc - cases_to_keep + num_cases_to_keep (min ;grow coreset in relation to its current size. (max @@ -721,7 +720,7 @@ ) ;sorting low dc cases by *decreasing* "score" and return the right amount - (if (= 1 cases_to_keep) + (if (= 1 num_cases_to_keep) (trunc (index_max low_ns_case_scores) 1) (sort @@ -732,7 +731,7 @@ ) ) lowest_ns_cases - cases_to_keep + num_cases_to_keep ) ) ) @@ -785,9 +784,9 @@ ) ) - (assign (assoc + (declare (assoc ;the list of case ids to be removed - cases (contained_entities (query_equals ".keeping" .false) ) + cases_to_remove (contained_entities (query_equals ".keeping" .false) ) )) (if !tsTimeFeature @@ -800,7 +799,7 @@ (lambda (< (size (contained_entities - (query_not_in_entity_list cases) + (query_not_in_entity_list cases_to_remove) ;(current_value) is in the format of (list (query_equals "series_feature_id" value) ... ) for all affected series ids (current_value) )) @@ -814,11 +813,11 @@ ;of the selected cases, only keep those that were either the first or last case from a series (append (contained_entities - (query_in_entity_list cases) + (query_in_entity_list cases_to_remove) (query_equals ".reverse_series_index" 0) ) (contained_entities - (query_in_entity_list cases) + (query_in_entity_list cases_to_remove) (query_equals ".series_index" 0) ) ) @@ -828,9 +827,9 @@ ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series (assign (assoc - cases + cases_to_remove (contained_entities - (query_in_entity_list cases) + (query_in_entity_list cases_to_remove) (query_not_equals ".reverse_series_index" 0) (query_not_equals ".series_index" 0) ) @@ -839,11 +838,11 @@ ;there were series that will need to be entirely removed, add all those series cases for removal (if (size entire_series_removal_id_queries) (accum (assoc - cases + cases_to_remove (apply "append" (map (lambda (contained_entities - (query_not_in_entity_list cases) + (query_not_in_entity_list cases_to_remove) (current_value) ) ) @@ -854,9 +853,9 @@ ) ) - (if (size cases) + (if (size cases_to_remove) (call !RemoveCases (assoc - cases cases + cases cases_to_remove distribute_weight_feature distribute_weight_feature )) )