diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 65735a11..865bf05c 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -531,30 +531,9 @@ ;stores the maximum number of cases that may remain after data is reduced ; default to the value stored within the Trainee via 'set_auto_ablation_params', which defaults to 50,000. reduce_max_cases !postReduceMaxCases - ;{ref "AblationThresholdMap"} - ;a map of measure names (any of the prediction stats) to a map of feature names to threshold value. - ; absolute thresholds will cause data reduction to stop when any of the measure values for any of - ; the features for which a threshold is defined go above the threshold (in the case of rmse and - ; mae) or below the threshold (otherwise). - abs_threshold_map (assoc) - ;{ref "AblationThresholdMap"} - ;a map of measure names (any of the prediction stats) to a map of feature names to threshold value. - ; delta thresholds will cause data reduction to stop when the delta between any of the measure values - ; for any of the features for which a threshold is defined and its previous value go above the threshold - ; (in the case of rmse and mae) or below the threshold (otherwise). - delta_threshold_map (assoc) - ;{ref "AblationThresholdMap"} - ;a map of measure names (any of the prediction stats) to a map of feature names to threshold value. - ; relative thresholds will cause data reduction to stop when the relative change between any of the - ; measure values for any of the features for which a threshold is defined and its previous value go - ; above the threshold (in the case of rmse and mae) or below the threshold (otherwise). - rel_threshold_map (assoc) ;{type "boolean"} ;skip auto analyzing as cases are removed skip_auto_analyze .false - ;{type "number"} - ;the number of cases to ablate between analyses and influence weight entropy recalculation - batch_size !ablationBatchSize ) (call !ValidateParameters) @@ -563,286 +542,339 @@ ;Declare variables for internal use. (declare (assoc - max_influence_weight_entropy_to_keep .infinity - cases (list) - prev_prediction_stats_map (assoc) - thresholds_enabled (or (size abs_threshold_map) (size delta_threshold_map) (size rel_threshold_map) ) output (assoc) )) + ;ensure it's an integer + (assign (assoc reduce_max_cases (floor reduce_max_cases) )) + ;Ensure that ablation is initialized before we begin. (if (not !hasPopulatedCaseWeight) (call !InitializeAutoAblation (assoc weight_feature distribute_weight_feature )) ) + (declare (assoc num_cases (call !GetNumTrainingCases) )) - (declare (assoc - hyperparam_map - (call !GetHyperparameters (assoc - context_features features - weight_feature distribute_weight_feature - )) + ;nothing needed to reduce since the dataset is already small enough + (if (>= (max reduce_max_cases !autoAblationMinNumCases) num_cases) + (conclude + (call !Return (assoc payload output)) + ) + ) + + (declare (assoc + hyperparam_map (call !GetHyperparameters (assoc weight_feature distribute_weight_feature)) )) + (declare (assoc - closest_k (get hyperparam_map "k") + k_parameter (get hyperparam_map "k") p_parameter (get hyperparam_map "p") - dt_parameter (get hyperparam_map "dt") feature_weights (get hyperparam_map "featureWeights") + dt_parameter (get hyperparam_map "dt") feature_deviations (get hyperparam_map "featureDeviations") query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") - num_cases (call !GetNumTrainingCases) - ;reduction will stop within batch_size of reduce_max_cases, so if the gap between - ;reduce_max_cases and !autoAblationMinNumCases (max and min) cases is larger than batch_size, - ;the number of cases that need to be kept is approximately: max - batch_size, but can't be less than min. - approximate_num_cases_to_keep (max (- reduce_max_cases batch_size) !autoAblationMinNumCases) + all_case_ids (call !AllCases) + done .false )) - (if thresholds_enabled - (assign (assoc - prev_prediction_stats_map - (get - (call !CalculateFeatureResiduals (assoc - weight_feature distribute_weight_feature - use_case_weights .true - compute_all_statistics .true - )) - "prediction_stats" - ) - )) - ) - - ;pair of cases and associated sorted popularities (total normalized influence of all neighbors that referenced it) + ;cache surprisals to the nearest 100 for each case + ;any cases that aren't in their most similar 100 can be considered to be 'too far away' (declare (assoc - case_popularity_pair - (compute_on_contained_entities - (query_exists !internalLabelSession) - ||(query_entity_cumulative_nearest_entity_weights - closest_k - features - (null) ;all cases - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - distribute_weight_feature - (rand) - (null) ;radius - !numericalPrecision - .true + neighbor_surprisals_map + ||(map + (lambda + (compute_on_contained_entities + (query_not_in_entity_list [(current_index 1)]) + (query_nearest_generalized_distance + 100 + features + (current_index) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) + distribute_weight_feature + (rand) + (null) ;radius + !numericalPrecision + ) + ) ) + (zip all_case_ids) ) )) - ;all the cases that were not returned in the pair above have 0 popularity (no other cases reference them) - (declare (assoc - zero_popularity_neighbors - (contained_entities - (query_exists !internalLabelSession) - (query_not_in_entity_list (first case_popularity_pair)) - ) - )) + ;mark each case as not being kept at first + (map + (lambda + (accum_entity_roots (current_value) (zip_labels + [".keeping"] [.false] + )) + ) + all_case_ids + ) - ;determine the cutoff value of the popularity at which all cases with a value less than that should be removed - ;e.g., if there needs to be a quarter of cases left, this would compute the 0.75 quantile of popularity values, - ;so that those bottom 75% are removed - (declare (assoc - reduction_popularity_cutoff - (quantile - (append - (last case_popularity_pair) - (range 0 1 (size zero_popularity_neighbors) 1) - ) - ;add one percent to account for enough cases selected to match the amount needed to be removed due to rounding - ;e.g., if the quantile value was 0.75 from the example above, this bumps it up to 0.76 - (+ - (/ (- num_cases approximate_num_cases_to_keep) num_cases) - 0.01 - ) + ;store the surprisal to each cases's most similar neighbor + (call !StoreCaseValues (assoc + case_values_map + (map + (lambda (apply "min" (values (current_value)))) + neighbor_surprisals_map ) + label_name ".neighbor_surprisal" )) - ;plan to only remove cases whose popularity is less than reduction_popularity_cutoff - ;i.e., only remove the non-popular cases that aren't referenced by others as much - (declare (assoc - num_removal_eligible_cases - (size (filter - (lambda (< (current_value) reduction_popularity_cutoff)) - (last case_popularity_pair) - )) - )) + (declare (assoc - ;case ids in order from highest to lowest popularity, lowest popularity at end of list - removable_cases - (append - ;only keep the necessary number of lowest popularity eligible cases as well as all zero popularity ones - (tail (first case_popularity_pair) num_removal_eligible_cases) - zero_popularity_neighbors + ;map of case to its core-set surprisal (the max surprisal to any case in the coreset for all cases) + ;set it to an extremely large value + case_to_css_map (map 10e13 (zip all_case_ids)) + + ;Experimental parameter: the amount of lowest neighbor-surprisal cases to consider for keeping + ;CAN BE NULLED + lowest_ns_cases_trunc_n (null) + + ;maximum number of cases to select for keeping per iteration + max_cases_to_keep_per_iter + (min + 30 + (ceil (/ reduce_max_cases 100)) ) - )) - (declare (assoc - ;list will be sorted from highest to lowest, thus cases removed from the end of the list - end_index (- (size removable_cases) 1) - random_cases .false - num_removed_this_batch 0 + ;numeric value for the coreset-surprisal to neighbor-surprisal ratio + ;if the maximum ratio among non core-set features is lower than this value and + ;enough cases have already been selected, then selection is ended. + ratio_cutoff_value 5.0 )) - ;Begin looping on data removal. The ultimate end condition is if the dataset gets too small to continue removing cases. - (while (< !autoAblationMinNumCases (call !GetNumTrainingCases)) - (assign (assoc - num_removed_this_batch (min batch_size (- (call !GetNumTrainingCases) !autoAblationMinNumCases)) - )) - (assign (assoc - cases - (if (>= end_index 0) - ;grab the cases from the end, with the smallest values - (unzip - removable_cases - (range - (max 0 (- end_index num_removed_this_batch -1)) - end_index + (while (not done) + (let + (assoc + cases_to_add + (if (= (current_index 1) 0) + ;on first iteration, just take lowest DC case + (contained_entities + (query_equals ".keeping" .false) + (query_min ".neighbor_surprisal" 1 .true) ) - ) - ;else select random cases - (contained_entities - (query_exists distribute_weight_feature) - (query_select num_removed_this_batch (null) (rand) ) - ) - ) - )) + ;otherwise need cases with low neighbor surprisal (ns) that is far from its most similar case in current_cases_to_keep + (let + (assoc + lowest_ns_cases + (contained_entities + (query_equals ".keeping" .false) + (if lowest_ns_cases_trunc_n + (query_min ".neighbor_surprisal" lowest_ns_cases_trunc_n .true) + ) + ) + ) - (if (>= end_index 0) - ;update end index to account for the cases about to be removed - (assign (assoc end_index (- end_index (size cases)) )) + (declare (assoc + low_ns_case_scores + (map + (lambda + ;coreset surprisal / neighbor surprisal, the smaller the neighbor surprisal the larger this score + (/ + (get case_to_css_map (current_index)) + (retrieve_from_entity (current_index) ".neighbor_surprisal") + ) + ) + (zip lowest_ns_cases) + ) + )) - ;else no more removable cases left, remove random cases - (assign (assoc random_cases .true)) - ) + (declare (assoc + coreset_size (size (contained_entities (query_equals ".keeping" .true))) + )) - (if !tsTimeFeature - (let - (assoc - entire_series_removal_id_queries - ;select those series identifiers where there will be less than 3 cases remaining after this removal pass - ;because these entire series should be removed at that point - (filter - (lambda - (< - (size (contained_entities - (query_not_in_entity_list cases) - ;(current_value) is in the format of (list (query_equals "series_feature_id" value) ... ) for all affected series ids - (current_value) - )) - 3 + (declare (assoc + num_cases_to_keep + (min + ;grow coreset in relation to its current size. + (max + 1 + (floor (/ coreset_size 10)) + ) + ;or use the maximum number of cases per iteration + max_cases_to_keep_per_iter + ;if approaching max size, select as many cases needed to reach limit. + (- reduce_max_cases coreset_size) + ) + )) + + ;if enough cases have been selected, and ratios have fallen below the cutoff, then end case selection + ;after this iteration. + (if (and + (>= coreset_size !autoAblationMinNumCases) + (< + (apply "max" (values low_ns_case_scores)) + ratio_cutoff_value + ) ) + (assign (assoc done .true)) ) - ;a list of affected series identifier queries for this batch of 'cases' - (call !GenerateUniqueSeriesQueries (assoc - series_id_features (get !tsFeaturesMap "series_id_features") - case_ids - ;of the selected cases, only keep those that were either the first or last case from a series - (append - (contained_entities - (query_in_entity_list cases) - (query_equals ".reverse_series_index" 0) - ) - (contained_entities - (query_in_entity_list cases) - (query_equals ".series_index" 0) + + ;sorting low dc cases by *decreasing* "score" and return the right amount + (if (= 1 num_cases_to_keep) + (trunc (index_max low_ns_case_scores) 1) + + (sort + (lambda + (- + (get low_ns_case_scores (current_value 1)) + (get low_ns_case_scores (current_value)) ) ) - )) + lowest_ns_cases + num_cases_to_keep + ) + ) ) + ) + ) + + ;mark new cases to keep + (map + (lambda + (assign_to_entities (current_value) (assoc ".keeping" .true )) ) + cases_to_add + ) - ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series - (assign (assoc - cases - (contained_entities - (query_in_entity_list cases) - (query_not_equals ".reverse_series_index" 0) - (query_not_equals ".series_index" 0) + (assign (assoc + case_to_css_map + ;for every non-coreset case, store min of newly computed min css and old min css + ||(map + (lambda + (min + ;for each non-coreset case, determine surprisal to its closest case in the coreset + (or + ;get the case's min surprisal to any of the cases_to_add, and if they are all too far away, + ;just output an exetremely large value + (apply "min" (values + (unzip + (get neighbor_surprisals_map (current_index)) + cases_to_add + ) + )) + ;extremeley large value + 10e13 + ) + (get case_to_css_map (current_index)) + ) ) - )) + ;all non-coreset cases + (zip (contained_entities (query_equals ".keeping" .false))) + ) + )) - ;there were series that will need to be entirely removed, add all those series cases for removal - (if (size entire_series_removal_id_queries) - (accum (assoc - cases - (apply "append" (map - (lambda - (contained_entities - (query_not_in_entity_list cases) - (current_value) - ) - ) - entire_series_removal_id_queries - )) + (if (>= + (size (contained_entities + (query_equals ".keeping" .true) )) + reduce_max_cases ) + (assign (assoc done .true)) ) ) + ) - (if (size cases) - (seq - (call !RemoveCases (assoc - cases cases - distribute_weight_feature distribute_weight_feature - )) + (declare (assoc + ;the list of case ids to be removed + cases_to_remove (contained_entities (query_equals ".keeping" .false) ) + )) - (if thresholds_enabled - (let - (assoc - batch_threshold_info (null) - new_prediction_stats_map - (get - (call !CalculateFeatureResiduals (assoc - weight_feature distribute_weight_feature - use_case_weights .true - compute_all_statistics .true - )) - "prediction_stats" - ) - ) - (assign (assoc - batch_threshold_info - (call !CheckThresholds (assoc - abs_threshold_map abs_threshold_map - delta_threshold_map delta_threshold_map - rel_threshold_map rel_threshold_map - prev_prediction_stats_map prev_prediction_stats_map - new_prediction_stats_map new_prediction_stats_map + (if !tsTimeFeature + (let + (assoc + entire_series_removal_id_queries + ;select those series identifiers where there will be less than 3 cases remaining after this removal pass + ;because these entire series should be removed at that point + (filter + (lambda + (< + (size (contained_entities + (query_not_in_entity_list cases_to_remove) + ;(current_value) is in the format of (list (query_equals "series_feature_id" value) ... ) for all affected series ids + (current_value) )) - )) - (if (apply "or" (values batch_threshold_info)) - (seq - (accum "output" ["threshold_info"] batch_threshold_info) - (conclude) + 3 ) - (assign (assoc - prev_prediction_stats_map new_prediction_stats_map - )) ) + ;a list of affected series identifier queries for this batch of 'cases' + (call !GenerateUniqueSeriesQueries (assoc + series_id_features (get !tsFeaturesMap "series_id_features") + case_ids + ;of the selected cases, only keep those that were either the first or last case from a series + (append + (contained_entities + (query_in_entity_list cases_to_remove) + (query_equals ".reverse_series_index" 0) + ) + (contained_entities + (query_in_entity_list cases_to_remove) + (query_equals ".series_index" 0) + ) + ) + )) ) - ) ) - ;else couldn't select any from random cases, stop - (and random_cases (< end_index 0)) - (conclude) - ) + ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series + (assign (assoc + cases_to_remove + (contained_entities + (query_in_entity_list cases_to_remove) + (query_not_equals ".reverse_series_index" 0) + (query_not_equals ".series_index" 0) + ) + )) - ;enough cases have been removed, can stop removing - (if (<= (call !GetNumTrainingCases) reduce_max_cases) - (conclude) + ;there were series that will need to be entirely removed, add all those series cases for removal + (if (size entire_series_removal_id_queries) + (accum (assoc + cases_to_remove + (apply "append" (map + (lambda + (contained_entities + (query_not_in_entity_list cases_to_remove) + (current_value) + ) + ) + entire_series_removal_id_queries + )) + )) + ) ) ) + (if (size cases_to_remove) + (call !RemoveCases (assoc + cases cases_to_remove + distribute_weight_feature distribute_weight_feature + )) + ) + + ;remove added features + (map + (lambda (assign_entity_roots + (current_value) + (filter + (lambda + (not (contains_value ["#.keeping" "#.neighbor_surprisal"] (first (get_labels (current_value))) )) + ) + (retrieve_entity_root (current_value)) + ) + )) + (call !AllCases) + ) + + ;if the number of cases has been reduced by 'e' or more, auto analyze if needed (if (< (call !GetNumTrainingCases) (/ num_cases 2.718281828459)) (call !AutoAnalyzeIfNeeded (assoc @@ -921,22 +953,6 @@ )) duplicate_neighbors_map ) - - ;recompute influence weight entropy for the remaining no-longer duplicates - (declare (assoc - cases_too_far_map - (call !ComputeAndStoreInfluenceWeightEntropies (assoc - features features - weight_feature distribute_weight_feature - use_case_weights .true - compute_all .true - specific_case_ids (indices duplicate_neighbors_map) - )) - )) - - (if (size cases_too_far_map) - (accum (assoc case_duplicate_or_far_map cases_too_far_map)) - ) ) diff --git a/howso/remove_cases.amlg b/howso/remove_cases.amlg index eb80c57e..09574af4 100644 --- a/howso/remove_cases.amlg +++ b/howso/remove_cases.amlg @@ -270,52 +270,6 @@ ) ) - (declare (assoc re_derivation_series_case_ids (null) )) - - ;check if time series dataset, if so, need to pull all cases from affected series so that their - ;derived features can be-rederived below after cases are removed - (if (and (!= (null) !tsTimeFeature) (size !derivedFeaturesMap)) - (let - (assoc series_id_features (get !tsFeaturesMap "series_id_features") ) - - ;for each case pull the list of series id values for all the series id features - (declare (assoc - series_id_lists - (values (map - (lambda (retrieve_from_entity (current_value) series_id_features)) - cases - ) .true) - )) - - ;for each series affected, get the cases so they can be re-derived - (declare (assoc - per_series_cases - (map - (lambda (let - (assoc - series_id_values (current_value 1) - ) - - (contained_entities - (values (map - (lambda - (query_equals (current_index) (current_value)) - ) - (zip series_id_features series_id_values) - )) - ) - )) - series_id_lists - ) - )) - - ;collapse all lists of cases into one list of unique case ids - (assign (assoc - re_derivation_series_case_ids (apply "append" per_series_cases) - )) - ) - ) - ;remove all the cases (apply "destroy_entities" cases) @@ -370,16 +324,6 @@ ;dataset has changed so clear out these cached value (call !ClearCachedCountsAndEntropies) - ;if there are cases that need to have features re-derived, do it here - (if re_derivation_series_case_ids - (call !DeriveTrainFeatures (assoc - features !trainedFeatures - ;keep and derive only those features that are not in the features list - derived_features (get !tsFeaturesMap "ts_derived_features") - case_ids re_derivation_series_case_ids - )) - ) - (call !UpdateHasNulls (assoc features !trainedFeatures)) (call !UpdateRegionalMinSize (assoc diff --git a/howso/scale.amlg b/howso/scale.amlg index da7e762f..d60ba474 100644 --- a/howso/scale.amlg +++ b/howso/scale.amlg @@ -539,9 +539,6 @@ (size !hyperparameterMetadataMap) ) (call reduce_data (assoc - abs_threshold_map !autoAblationAbsThresholdMap - delta_threshold_map !autoAblationDeltaThresholdMap - rel_threshold_map !autoAblationRelThresholdMap skip_auto_analyze skip_auto_analyze )) diff --git a/howso/train.amlg b/howso/train.amlg index 64785a12..2ab7e2b3 100644 --- a/howso/train.amlg +++ b/howso/train.amlg @@ -982,9 +982,6 @@ ) (call reduce_data (assoc features features - abs_threshold_map !autoAblationAbsThresholdMap - delta_threshold_map !autoAblationDeltaThresholdMap - rel_threshold_map !autoAblationRelThresholdMap skip_auto_analyze skip_auto_analyze )) diff --git a/unit_tests/ut_h_derive_start_end.amlg b/unit_tests/ut_h_derive_start_end.amlg index 11ed244f..eb0970f6 100644 --- a/unit_tests/ut_h_derive_start_end.amlg +++ b/unit_tests/ut_h_derive_start_end.amlg @@ -139,7 +139,7 @@ )) - ;get the earlier case and check that its derived values are updated + ;get the earlier case and check that its derived values stay the same (assign (assoc result (get @@ -158,21 +158,21 @@ ) )) - (print "Validate case derived features are updated correctly: ") + (print "Validate case derived features don't change: ") (call assert_approximate (assoc obs mike_case_3 exp (assoc - ".series_index" 2 + ".series_index" 3 ".reverse_series_index" 1 ".session_training_index" 4 - ".time_delta_1" 1800 - ".time_lag_1" 1600 + ".time_delta_1" 1400 + ".time_lag_1" 2000 "name" "mike" "time" 3400 ) )) - (call exit_if_failures (assoc msg "Re-derived features correctly after case removal.")) + (call exit_if_failures (assoc msg "derived features remain as they were after case removal.")) (call exit_if_failures (assoc msg unit_test_name )) ) \ No newline at end of file diff --git a/unit_tests/ut_h_reduce_data.amlg b/unit_tests/ut_h_reduce_data.amlg index bea1e481..79c7fd28 100644 --- a/unit_tests/ut_h_reduce_data.amlg +++ b/unit_tests/ut_h_reduce_data.amlg @@ -175,7 +175,7 @@ obs (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count")) )) - (print "reduce_data with no thresholds reduces down to minimum model size: ") + (print "reduce_data reduces down to minimum model size: ") (call_entity "howso" "reduce_data") (call assert_same (assoc exp @@ -186,43 +186,9 @@ obs (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count")) )) - (call exit_if_failures (assoc msg "reduce_data with no thresholds reduced as expected")) + (call exit_if_failures (assoc msg "reduce_data reduced as expected")) (call !EmptyAndTrain) - (print "reduce_data with very liberal thresholds reduces down to minimum model size: ") - (call_entity "howso" "reduce_data" (assoc - abs_threshold_map (assoc accuracy (assoc species 0.1)) - delta_threshold_map (assoc accuracy (assoc species 0.5)) - rel_threshold_map (assoc accuracy (assoc species 0.5)) - )) - (call assert_same (assoc - exp - (get - (call_entity "howso" "get_auto_ablation_params") - (list 1 "payload" "min_num_cases") - ) - obs - (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count")) - )) - (call exit_if_failures (assoc msg "reduce_data with liberal thresholds reduced as expected")) - (call !EmptyAndTrain) - - (print "reduce_data with conservative thresholds reduces to above minimum model size: ") - (call_entity "howso" "reduce_data" (assoc - abs_threshold_map (assoc accuracy (assoc species 1)) - delta_threshold_map (assoc accuracy (assoc species 0.001)) - rel_threshold_map (assoc accuracy (assoc species 0.001)) - )) - (call assert_same (assoc - exp - (- - (size training_data) - (call_entity "howso""debug_label" (assoc label "!ablationBatchSize")) - ) - obs - (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count")) - )) - (call exit_if_failures (assoc msg "reduce_data with conservative thresholds reduced as expected")) (print "a basic train automatically calls reduce_data when the threshold is low enough, resulting in min. model size w/ no threshold maps: ") (call !EmptyAndTrain (assoc enable_auto_ablation .true max_num_cases 120)) diff --git a/unit_tests/ut_h_scale_ablation.amlg b/unit_tests/ut_h_scale_ablation.amlg index 131107ea..aa634bcf 100644 --- a/unit_tests/ut_h_scale_ablation.amlg +++ b/unit_tests/ut_h_scale_ablation.amlg @@ -33,11 +33,12 @@ )) (print "set_auto_ablation_params\n") - (call_entity "howso" "set_auto_ablation_params" (assoc - auto_ablation_enabled .true - min_num_cases 100 - max_num_cases 200 - )) + (call_entity "howso" "set_auto_ablation_params" (assoc + auto_ablation_enabled .true + min_num_cases 100 + max_num_cases 200 + reduce_max_cases 100 + )) (print "train\n") (call_entity "howso" "train" (assoc @@ -63,32 +64,20 @@ obs (first first_pass_cases) exp 1 )) - (print "only '998' and cases under '100' remain: ") + (print "only 100 cases remain: ") (call assert_true (assoc - obs - (apply "and" - (map - (lambda (or (< (first (current_value)) 100) (= (first (current_value)) 998))) + obs (= + 100 + (size (get first_pass_cases (list 1 "payload" "cases")) ) ) )) - ;train some more cases with medium numbers. - - (print "train\n") - (call_entity "howso" "train" (assoc - cases (call !CreateSquareCases (assoc xs (range 400 498))) - features (list "x" "y") - session "unit_test" - )) - - ; set up training a specific number of medium-valued cases, anything near here was dropped in the first - ; reduce_data call, but we've loaded in some duplicate cases. - + ;would be payload for cases in the 400-500 range (declare (assoc train_payload (call_entity "howso" "compute_train_payload" (assoc - cases (call !CreateSquareCases (assoc xs (range 450 455))) + cases (call !CreateSquareCases (assoc xs (range 400 500))) features (list "x" "y") session "unit_test" )) @@ -100,12 +89,16 @@ )) ; With that payload in hand, let's reduce_data again. - + (call_entity "howso" "set_auto_ablation_params" (assoc + auto_ablation_enabled .true + min_num_cases 25 + max_num_cases 100 + reduce_max_cases 25 + )) (print "reduce_data\n") - (call_entity "howso" "reduce_data" (assoc)) - - ; This will again drop a lot of the medium-valued cases, so we're going to fail committing the payload. + (call_entity "howso" "reduce_data") + ; This will drop a lot of the cases we need, so we're going to fail committing the payload. (print "process_train_payload failed: ") (call assert_same (assoc obs (call_entity "howso" "process_train_payload" (get train_payload (list 1 "payload"))) diff --git a/version.json b/version.json index bac26318..24586f46 100644 --- a/version.json +++ b/version.json @@ -1,6 +1,6 @@ { "version": "0.0.0", "dependencies": { - "amalgam": "71.0.4" + "amalgam": "71.0.6" } }