-
Notifications
You must be signed in to change notification settings - Fork 35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add window-wise RANSAC to reduce RAM requirements #66
Changes from 8 commits
9f5c784
29cb6bf
83b38d6
354f635
b13b874
629950b
94d157f
56e7677
7cbe3bf
96b2965
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -144,22 +144,42 @@ def get_bads(self, verbose=False): | |||||||||||||||||||
) | ||||||||||||||||||||
return bads | ||||||||||||||||||||
|
||||||||||||||||||||
def find_all_bads(self, ransac=True): | ||||||||||||||||||||
def find_all_bads(self, ransac=True, channel_wise=False, max_chunk_size=None): | ||||||||||||||||||||
"""Call all the functions to detect bad channels. | ||||||||||||||||||||
|
||||||||||||||||||||
This function calls all the bad-channel detecting functions. | ||||||||||||||||||||
|
||||||||||||||||||||
Parameters | ||||||||||||||||||||
---------- | ||||||||||||||||||||
ransac : bool | ||||||||||||||||||||
To detect channels by ransac or not. | ||||||||||||||||||||
ransac : bool, optional | ||||||||||||||||||||
Whether RANSAC should be for bad channel detection, in addition to | ||||||||||||||||||||
the other methods. RANSAC can detect bad channels that other methods | ||||||||||||||||||||
are unable to catch, but also slows down noisy channel detection | ||||||||||||||||||||
considerably. Defaults to ``True``. | ||||||||||||||||||||
channel_wise : bool, optional | ||||||||||||||||||||
Whether RANSAC should predict signals for whole chunks of channels | ||||||||||||||||||||
at once instead of predicting signals for each RANSAC window | ||||||||||||||||||||
individually. Channel-wise RANSAC generally has higher RAM demands | ||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice and clear, I like it! |
||||||||||||||||||||
than window-wise RANSAC (especially if `max_chunk_size` is | ||||||||||||||||||||
``None``), but can be faster on systems with lots of RAM to spare. | ||||||||||||||||||||
Has no effect if not using RANSAC. Defaults to ``False``. | ||||||||||||||||||||
max_chunk_size : {int, None}, optional | ||||||||||||||||||||
The maximum number of channels to predict at once during | ||||||||||||||||||||
channel-wise RANSAC. If ``None``, RANSAC will use the largest chunk | ||||||||||||||||||||
size that will fit into the available RAM, which may slow down | ||||||||||||||||||||
other programs on the host system. If using window-wise RANSAC | ||||||||||||||||||||
(the default) or not using RANSAC at all, this parameter has no | ||||||||||||||||||||
effect. Defaults to ``None``. | ||||||||||||||||||||
|
||||||||||||||||||||
""" | ||||||||||||||||||||
self.find_bad_by_nan_flat() | ||||||||||||||||||||
self.find_bad_by_deviation() | ||||||||||||||||||||
self.find_bad_by_SNR() | ||||||||||||||||||||
if ransac: | ||||||||||||||||||||
self.find_bad_by_ransac() | ||||||||||||||||||||
self.find_bad_by_ransac( | ||||||||||||||||||||
channel_wise=channel_wise, | ||||||||||||||||||||
max_chunk_size=max_chunk_size | ||||||||||||||||||||
) | ||||||||||||||||||||
|
||||||||||||||||||||
def find_bad_by_nan_flat(self): | ||||||||||||||||||||
"""Detect channels that appear flat or have NaN values.""" | ||||||||||||||||||||
|
@@ -409,6 +429,7 @@ def find_bad_by_ransac( | |||||||||||||||||||
fraction_bad=0.4, | ||||||||||||||||||||
corr_window_secs=5.0, | ||||||||||||||||||||
channel_wise=False, | ||||||||||||||||||||
max_chunk_size=None, | ||||||||||||||||||||
): | ||||||||||||||||||||
"""Detect channels that are not predicted well by other channels. | ||||||||||||||||||||
|
||||||||||||||||||||
|
@@ -447,10 +468,18 @@ def find_bad_by_ransac( | |||||||||||||||||||
The duration (in seconds) of each RANSAC correlation window. Defaults | ||||||||||||||||||||
to 5 seconds. | ||||||||||||||||||||
channel_wise : bool, optional | ||||||||||||||||||||
Whether RANSAC should be performed one channel at a time (lower RAM | ||||||||||||||||||||
demands) or in chunks of as many channels as can fit into the | ||||||||||||||||||||
currently available RAM (faster). Defaults to ``False`` (i.e., using | ||||||||||||||||||||
the faster method). | ||||||||||||||||||||
Whether RANSAC should predict signals for whole chunks of channels | ||||||||||||||||||||
at once instead of predicting signals for each RANSAC window | ||||||||||||||||||||
individually. Channel-wise RANSAC generally has higher RAM demands | ||||||||||||||||||||
than window-wise RANSAC (especially if `max_chunk_size` is | ||||||||||||||||||||
``None``), but can be faster on systems with lots of RAM to spare. | ||||||||||||||||||||
Defaults to ``False``. | ||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay I think I am a little confused by this. We can run RANSAC in the following ways:
There would be a fourth potential option -> running single channels over windows ... so that'd be the slowest possible, but the lowest RAM ever (can probably even run on a raspberry pi 🤣 ). Overall I am not sure how many of these options we should offer and how to document them. Right now, I think one option should be fine: the matlab/autoreject way of doing all channels at once but over windows ... and if too little RAM is there, we just need to make the windows smaller ... right? (or would that affect the accuracy at some point?) what are your opinions @a-hurst @yjmantilla There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd thought about that too (chunking-by-channel in window-wise RANSAC), but wasn't sure how to best merge the chunking code with the windowing stuff without a massive rewrite. Right now, window-wise still takes less RAM than one-at-a-time channel-wise unless (channel count x 5 seconds) is larger than the length of the whole recording (i.e., one-channel-at-a-time only takes less RAM if a 32-channel recording is under 2.6 minutes, a 64-channel is under 5.3 mins, a 128-channel is under 10.6 mins, etc.). Thus, in most cases window-wise should be good enough on the RAM front. Still, I wouldn't be opposed to getting that down further: with enough optimization, I could probably get PyPREP running (slowly) without swapping on my old iMac G4! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think window-wise is better. I remember talking bout this in another thread. In the usual use-case window-wise will usually consume less ram than the approach i took. I think it would be nice to offer low-ram version even though it is the slowest, maybe fitting the largest chunk of channels could be done automatically like it is done currently in the channel wise version. but is not a priority. All the work @a-hurst is doing is quite amazing, priority should be that matlab prep comparison. So +1 making the window-wise the default There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @yjmantilla! As for the MATLAB PREP comparison, I just got the MATLAB side of that working today (automated MatPREP on GitHub actions, saving relevant data as artifacts for later testing) so expect some preliminary stuff on that end shortly :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please adjust this docstr according to https://github.com/sappelhoff/pyprep/pull/66/files#r624438768 for consistency :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lol that commit was after I'd spent a good 30-40 minutes trying to figure out why Turns out, if you use any keyword arguments in a MATLAB function call (e.g. |
||||||||||||||||||||
max_chunk_size : {int, None}, optional | ||||||||||||||||||||
The maximum number of channels to predict at once during | ||||||||||||||||||||
channel-wise RANSAC. If ``None``, RANSAC will use the largest chunk | ||||||||||||||||||||
size that will fit into the available RAM, which may slow down | ||||||||||||||||||||
other programs on the host system. If using window-wise RANSAC | ||||||||||||||||||||
(the default), this parameter has no effect. Defaults to ``None``. | ||||||||||||||||||||
|
||||||||||||||||||||
References | ||||||||||||||||||||
---------- | ||||||||||||||||||||
|
@@ -479,6 +508,7 @@ def find_bad_by_ransac( | |||||||||||||||||||
fraction_bad, | ||||||||||||||||||||
corr_window_secs, | ||||||||||||||||||||
channel_wise, | ||||||||||||||||||||
max_chunk_size, | ||||||||||||||||||||
self.random_state, | ||||||||||||||||||||
self.matlab_strict, | ||||||||||||||||||||
) | ||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,19 @@ class PrepPipeline: | |
ransac : bool, optional | ||
Whether or not to use RANSAC for noisy channel detection in addition to | ||
the other methods in :class:`~pyprep.NoisyChannels`. Defaults to True. | ||
channel_wise : bool, optional | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
Whether RANSAC should predict signals for whole chunks of channels at | ||
once instead of predicting signals for each RANSAC window | ||
individually. Channel-wise RANSAC generally has higher RAM demands than | ||
window-wise RANSAC (especially if `max_chunk_size` is ``None``), but can | ||
be faster on systems with lots of RAM to spare.nHas no effect if not | ||
using RANSAC. Defaults to ``False``. | ||
max_chunk_size : {int, None}, optional | ||
The maximum number of channels to predict at once during channel-wise | ||
RANSAC. If ``None``, RANSAC will use the largest chunk size that will | ||
fit into the available RAM, which may slow down other programs on the | ||
host system. If using window-wise RANSAC (the default) or not using | ||
RANSAC at all, this parameter has no effect. Defaults to ``None``. | ||
random_state : {int, None, np.random.RandomState}, optional | ||
The random seed at which to initialize the class. If random_state is | ||
an int, it will be used as a seed for RandomState. | ||
|
@@ -99,6 +112,8 @@ def __init__( | |
prep_params, | ||
montage, | ||
ransac=True, | ||
channel_wise=False, | ||
max_chunk_size=None, | ||
random_state=None, | ||
filter_kwargs=None, | ||
matlab_strict=False, | ||
|
@@ -133,7 +148,11 @@ def __init__( | |
if self.prep_params["reref_chs"] == "eeg": | ||
self.prep_params["reref_chs"] = self.ch_names_eeg | ||
self.sfreq = self.raw_eeg.info["sfreq"] | ||
self.ransac = ransac | ||
self.ransac_settings = { | ||
'ransac': ransac, | ||
'channel_wise': channel_wise, | ||
'max_chunk_size': max_chunk_size | ||
} | ||
self.random_state = check_random_state(random_state) | ||
self.filter_kwargs = filter_kwargs | ||
self.matlab_strict = matlab_strict | ||
|
@@ -189,9 +208,9 @@ def fit(self): | |
reference = Reference( | ||
self.raw_eeg, | ||
self.prep_params, | ||
ransac=self.ransac, | ||
random_state=self.random_state, | ||
matlab_strict=self.matlab_strict | ||
matlab_strict=self.matlab_strict, | ||
**self.ransac_settings | ||
) | ||
reference.perform_reference() | ||
self.raw_eeg = reference.raw | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.