Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
51049cc
grab random cases to remove
cademack Jan 13, 2026
495f44a
remove some unused stuff
cademack Jan 14, 2026
f815bf1
one last unused var
cademack Jan 14, 2026
5fbefc9
randomize test
howsoRes Jan 14, 2026
457700d
make it one pass
cademack Jan 14, 2026
f887770
crappy python translation
cademack Jan 14, 2026
67ee26d
fixed
cademack Jan 15, 2026
7448590
msimatch paren
cademack Jan 15, 2026
e3b1b93
impl
cademack Jan 15, 2026
ca752b8
udpaters
cademack Jan 16, 2026
3a539a0
now testing this version
cademack Jan 16, 2026
022331a
nitpicking
cademack Jan 16, 2026
f967c97
optimization
cademack Jan 16, 2026
e5769dd
blur weights, 2 passes
howsoRes Jan 16, 2026
8d0781e
Merge branch 'weight-blur' into 24929-coreset-lit
howsoRes Jan 16, 2026
f99543d
added comment
howsoRes Jan 16, 2026
b8c4aea
switch to min mode
cademack Jan 16, 2026
6dd2e65
remove zero weight and duplicates
howsoRes Jan 16, 2026
cc57cc7
Merge branch 'weight-blur-2' into 24929-coreset-lit
howsoRes Jan 16, 2026
902e976
updating comments and such
cademack Jan 16, 2026
df2eb1d
comment out blurring, add performance hack
howsoRes Jan 18, 2026
d0e43c1
cleanup
howsoRes Jan 19, 2026
a2619d6
Merge branch 'main' into 24929-coreset-res
howsoRes Jan 19, 2026
4cb468b
update extremely large value
howsoRes Jan 19, 2026
c550725
batch size smarts
cademack Jan 19, 2026
698ffca
dynamic batch size, case cleanup, and more
cademack Jan 19, 2026
1043b5e
unnecessary trunc
cademack Jan 19, 2026
ced55b5
ensure reduce_max_cases is an int
howsoRes Jan 19, 2026
d8cf3ac
fixes early out logic
howsoRes Jan 20, 2026
fc1ae9e
code cleanup, removed unused parameters
howsoRes Jan 20, 2026
1d054cb
more cleanup
howsoRes Jan 20, 2026
a233fe4
update unit test
howsoRes Jan 20, 2026
eef5ca4
Merge branch 'main' into 24929-coreset-res
howsoRes Jan 20, 2026
ddabaed
fix unit test, version bump
howsoRes Jan 20, 2026
d89fee4
revert changes to update_cases
howsoRes Jan 20, 2026
fae035a
remove unused batch_size parameter
howsoRes Jan 20, 2026
7eb6df6
removed ts rederivation after removal
howsoRes Jan 20, 2026
d85aa3e
speedup for storing smallest surprisals
howsoRes Jan 20, 2026
3ace85a
version bump
howsoRes Jan 20, 2026
74d1ae1
fix unit test
howsoRes Jan 20, 2026
370806a
ut fix
howsoRes Jan 20, 2026
c90b306
cap at 30
cademack Jan 20, 2026
007e946
renames
cademack Jan 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
526 changes: 271 additions & 255 deletions howso/ablation.amlg

Large diffs are not rendered by default.

56 changes: 0 additions & 56 deletions howso/remove_cases.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -270,52 +270,6 @@
)
)

(declare (assoc re_derivation_series_case_ids (null) ))

;check if time series dataset, if so, need to pull all cases from affected series so that their
;derived features can be-rederived below after cases are removed
(if (and (!= (null) !tsTimeFeature) (size !derivedFeaturesMap))
(let
(assoc series_id_features (get !tsFeaturesMap "series_id_features") )

;for each case pull the list of series id values for all the series id features
(declare (assoc
series_id_lists
(values (map
(lambda (retrieve_from_entity (current_value) series_id_features))
cases
) .true)
))

;for each series affected, get the cases so they can be re-derived
(declare (assoc
per_series_cases
(map
(lambda (let
(assoc
series_id_values (current_value 1)
)

(contained_entities
(values (map
(lambda
(query_equals (current_index) (current_value))
)
(zip series_id_features series_id_values)
))
)
))
series_id_lists
)
))

;collapse all lists of cases into one list of unique case ids
(assign (assoc
re_derivation_series_case_ids (apply "append" per_series_cases)
))
)
)

;remove all the cases
(apply "destroy_entities" cases)

Expand Down Expand Up @@ -370,16 +324,6 @@
;dataset has changed so clear out these cached value
(call !ClearCachedCountsAndEntropies)

;if there are cases that need to have features re-derived, do it here
(if re_derivation_series_case_ids
(call !DeriveTrainFeatures (assoc
features !trainedFeatures
;keep and derive only those features that are not in the features list
derived_features (get !tsFeaturesMap "ts_derived_features")
case_ids re_derivation_series_case_ids
))
)

(call !UpdateHasNulls (assoc features !trainedFeatures))

(call !UpdateRegionalMinSize (assoc
Expand Down
3 changes: 0 additions & 3 deletions howso/scale.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -539,9 +539,6 @@
(size !hyperparameterMetadataMap)
)
(call reduce_data (assoc
abs_threshold_map !autoAblationAbsThresholdMap
delta_threshold_map !autoAblationDeltaThresholdMap
rel_threshold_map !autoAblationRelThresholdMap
skip_auto_analyze skip_auto_analyze
))

Expand Down
3 changes: 0 additions & 3 deletions howso/train.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -982,9 +982,6 @@
)
(call reduce_data (assoc
features features
abs_threshold_map !autoAblationAbsThresholdMap
delta_threshold_map !autoAblationDeltaThresholdMap
rel_threshold_map !autoAblationRelThresholdMap
skip_auto_analyze skip_auto_analyze
))

Expand Down
12 changes: 6 additions & 6 deletions unit_tests/ut_h_derive_start_end.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@
))


;get the earlier case and check that its derived values are updated
;get the earlier case and check that its derived values stay the same
(assign (assoc
result
(get
Expand All @@ -158,21 +158,21 @@
)
))

(print "Validate case derived features are updated correctly: ")
(print "Validate case derived features don't change: ")
(call assert_approximate (assoc
obs mike_case_3
exp
(assoc
".series_index" 2
".series_index" 3
".reverse_series_index" 1
".session_training_index" 4
".time_delta_1" 1800
".time_lag_1" 1600
".time_delta_1" 1400
".time_lag_1" 2000
"name" "mike"
"time" 3400
)
))
(call exit_if_failures (assoc msg "Re-derived features correctly after case removal."))
(call exit_if_failures (assoc msg "derived features remain as they were after case removal."))

(call exit_if_failures (assoc msg unit_test_name ))
)
38 changes: 2 additions & 36 deletions unit_tests/ut_h_reduce_data.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@
obs (get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count"))
))

(print "reduce_data with no thresholds reduces down to minimum model size: ")
(print "reduce_data reduces down to minimum model size: ")
(call_entity "howso" "reduce_data")
(call assert_same (assoc
exp
Expand All @@ -186,43 +186,9 @@
obs
(get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count"))
))
(call exit_if_failures (assoc msg "reduce_data with no thresholds reduced as expected"))
(call exit_if_failures (assoc msg "reduce_data reduced as expected"))
(call !EmptyAndTrain)

(print "reduce_data with very liberal thresholds reduces down to minimum model size: ")
(call_entity "howso" "reduce_data" (assoc
abs_threshold_map (assoc accuracy (assoc species 0.1))
delta_threshold_map (assoc accuracy (assoc species 0.5))
rel_threshold_map (assoc accuracy (assoc species 0.5))
))
(call assert_same (assoc
exp
(get
(call_entity "howso" "get_auto_ablation_params")
(list 1 "payload" "min_num_cases")
)
obs
(get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count"))
))
(call exit_if_failures (assoc msg "reduce_data with liberal thresholds reduced as expected"))
(call !EmptyAndTrain)

(print "reduce_data with conservative thresholds reduces to above minimum model size: ")
(call_entity "howso" "reduce_data" (assoc
abs_threshold_map (assoc accuracy (assoc species 1))
delta_threshold_map (assoc accuracy (assoc species 0.001))
rel_threshold_map (assoc accuracy (assoc species 0.001))
))
(call assert_same (assoc
exp
(-
(size training_data)
(call_entity "howso""debug_label" (assoc label "!ablationBatchSize"))
)
obs
(get (call_entity "howso" "get_num_training_cases") (list 1 "payload" "count"))
))
(call exit_if_failures (assoc msg "reduce_data with conservative thresholds reduced as expected"))

(print "a basic train automatically calls reduce_data when the threshold is low enough, resulting in min. model size w/ no threshold maps: ")
(call !EmptyAndTrain (assoc enable_auto_ablation .true max_num_cases 120))
Expand Down
47 changes: 20 additions & 27 deletions unit_tests/ut_h_scale_ablation.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@
))

(print "set_auto_ablation_params\n")
(call_entity "howso" "set_auto_ablation_params" (assoc
auto_ablation_enabled .true
min_num_cases 100
max_num_cases 200
))
(call_entity "howso" "set_auto_ablation_params" (assoc
auto_ablation_enabled .true
min_num_cases 100
max_num_cases 200
reduce_max_cases 100
))

(print "train\n")
(call_entity "howso" "train" (assoc
Expand All @@ -63,32 +64,20 @@
obs (first first_pass_cases)
exp 1
))
(print "only '998' and cases under '100' remain: ")
(print "only 100 cases remain: ")
(call assert_true (assoc
obs
(apply "and"
(map
(lambda (or (< (first (current_value)) 100) (= (first (current_value)) 998)))
obs (=
100
(size
(get first_pass_cases (list 1 "payload" "cases"))
)
)
))

;train some more cases with medium numbers.

(print "train\n")
(call_entity "howso" "train" (assoc
cases (call !CreateSquareCases (assoc xs (range 400 498)))
features (list "x" "y")
session "unit_test"
))

; set up training a specific number of medium-valued cases, anything near here was dropped in the first
; reduce_data call, but we've loaded in some duplicate cases.

;would be payload for cases in the 400-500 range
(declare (assoc
train_payload (call_entity "howso" "compute_train_payload" (assoc
cases (call !CreateSquareCases (assoc xs (range 450 455)))
cases (call !CreateSquareCases (assoc xs (range 400 500)))
features (list "x" "y")
session "unit_test"
))
Expand All @@ -100,12 +89,16 @@
))

; With that payload in hand, let's reduce_data again.

(call_entity "howso" "set_auto_ablation_params" (assoc
auto_ablation_enabled .true
min_num_cases 25
max_num_cases 100
reduce_max_cases 25
))
(print "reduce_data\n")
(call_entity "howso" "reduce_data" (assoc))

; This will again drop a lot of the medium-valued cases, so we're going to fail committing the payload.
(call_entity "howso" "reduce_data")

; This will drop a lot of the cases we need, so we're going to fail committing the payload.
(print "process_train_payload failed: ")
(call assert_same (assoc
obs (call_entity "howso" "process_train_payload" (get train_payload (list 1 "payload")))
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"version": "0.0.0",
"dependencies": {
"amalgam": "71.0.4"
"amalgam": "71.0.6"
}
}
Loading