4
4
5
5
import numpy as np
6
6
from numpy import ma
7
+
8
+ try :
9
+ import bottleneck as bn
10
+ except ImportError :
11
+ HAS_BOTTLENECK = False
12
+ else :
13
+ HAS_BOTTLENECK = True
14
+
7
15
from .core import sigma_func
8
16
9
17
from astropy .nddata import CCDData , StdDevUncertainty
18
+ from astropy .stats import sigma_clip
10
19
from astropy import log
11
20
12
21
__all__ = ['Combiner' , 'combine' ]
13
22
14
23
24
+ def _default_median (): # pragma: no cover
25
+ if HAS_BOTTLENECK :
26
+ return bn .nanmedian
27
+ else :
28
+ return np .nanmedian
29
+
30
+
31
+ def _default_average (): # pragma: no cover
32
+ if HAS_BOTTLENECK :
33
+ return bn .nanmean
34
+ else :
35
+ return np .nanmean
36
+
37
+
38
+ def _default_sum (): # pragma: no cover
39
+ if HAS_BOTTLENECK :
40
+ return bn .nansum
41
+ else :
42
+ return np .nansum
43
+
44
+
45
+ def _default_std (): # pragma: no cover
46
+ if HAS_BOTTLENECK :
47
+ return bn .nanstd
48
+ else :
49
+ return np .nanstd
50
+
51
+
15
52
class Combiner :
16
53
"""
17
54
A class for combining CCDData objects.
@@ -258,7 +295,8 @@ def minmax_clipping(self, min_clip=None, max_clip=None):
258
295
259
296
# set up sigma clipping algorithms
260
297
def sigma_clipping (self , low_thresh = 3 , high_thresh = 3 ,
261
- func = ma .mean , dev_func = ma .std ):
298
+ func = ma .mean , dev_func = ma .std , use_astropy = False ,
299
+ ** kwd ):
262
300
"""
263
301
Pixels will be rejected if they have deviations greater than those
264
302
set by the threshold values. The algorithm will first calculated
@@ -283,15 +321,38 @@ def sigma_clipping(self, low_thresh=3, high_thresh=3,
283
321
func : function, optional
284
322
Function for calculating the baseline values (i.e. `numpy.ma.mean`
285
323
or `numpy.ma.median`). This should be a function that can handle
286
- `numpy.ma.MaskedArray` objects.
324
+ `numpy.ma.MaskedArray` objects. **Set to ``'median'`` and
325
+ set ``use_astropy=True`` for best performance if using a
326
+ median.**
287
327
Default is `numpy.ma.mean`.
288
328
289
329
dev_func : function, optional
290
330
Function for calculating the deviation from the baseline value
291
331
(i.e. `numpy.ma.std`). This should be a function that can handle
292
332
`numpy.ma.MaskedArray` objects.
293
333
Default is `numpy.ma.std`.
334
+
335
+ use_astropy : bool, optional
336
+ If ``True``, use astropy's `~astropy.stats.sigma_clip`, which is faster
337
+ and more flexible. The high/low sigma clip parameters are set
338
+ from ``low_thresh`` and ``high_thresh``. Any remaining keywords are passed
339
+ in to astropy's `~astropy.stats.sigma_clip`. By default, the
340
+ number of iterations and other settings will be made to reproduce
341
+ the behavior of ccdproc's ``sigma_clipping``.
294
342
"""
343
+ if use_astropy :
344
+ copy = kwd .get ('copy' , False )
345
+ axis = kwd .get ('axis' , 0 )
346
+ maxiters = kwd .get ('maxiters' , 1 )
347
+ self .data_arr .mask = \
348
+ sigma_clip (self .data_arr .data , sigma_lower = low_thresh ,
349
+ sigma_upper = high_thresh , axis = axis , copy = copy ,
350
+ maxiters = maxiters ,
351
+ cenfunc = func , stdfunc = dev_func ,
352
+ masked = True ,
353
+ ** kwd ).mask
354
+ return
355
+
295
356
# setup baseline values
296
357
baseline = func (self .data_arr , axis = 0 )
297
358
dev = dev_func (self .data_arr , axis = 0 )
@@ -313,8 +374,38 @@ def _get_scaled_data(self, scale_arg):
313
374
return self .data_arr * self .scaling
314
375
return self .data_arr
315
376
377
+ def _get_nan_substituted_data (self , data ):
378
+ # Get the data as an unmasked array with masked values filled as NaN
379
+ if self .data_arr .mask .any ():
380
+ data = np .ma .filled (data , fill_value = np .nan )
381
+ else :
382
+ data = data .data
383
+ return data
384
+
385
+ def _combination_setup (self ,
386
+ user_func ,
387
+ default_func ,
388
+ scale_to ):
389
+ """
390
+ Handle the common pieces of image combination data/mask setup.
391
+ """
392
+ data = self ._get_scaled_data (scale_to )
393
+
394
+ # Play it safe for now and only do the nan thing if the user is using
395
+ # the default combination function.
396
+ if user_func is None :
397
+ combo_func = default_func
398
+ # Subtitute NaN for masked entries
399
+ data = self ._get_nan_substituted_data (data )
400
+ masked_values = np .isnan (data ).sum (axis = 0 )
401
+ else :
402
+ masked_values = self .data_arr .mask .sum (axis = 0 )
403
+ combo_func = user_func
404
+
405
+ return data , masked_values , combo_func
406
+
316
407
# set up the combining algorithms
317
- def median_combine (self , median_func = ma . median , scale_to = None ,
408
+ def median_combine (self , median_func = None , scale_to = None ,
318
409
uncertainty_func = sigma_func ):
319
410
"""
320
411
Median combine a set of arrays.
@@ -351,15 +442,28 @@ def median_combine(self, median_func=ma.median, scale_to=None,
351
442
The uncertainty currently calculated using the median absolute
352
443
deviation does not account for rejected pixels.
353
444
"""
354
- # set the data
355
- data = median_func (self ._get_scaled_data (scale_to ), axis = 0 )
445
+
446
+ data , masked_values , median_func = \
447
+ self ._combination_setup (median_func ,
448
+ _default_median (),
449
+ scale_to )
450
+
451
+ medianed = median_func (data , axis = 0 )
356
452
357
453
# set the mask
358
- masked_values = self .data_arr .mask .sum (axis = 0 )
359
454
mask = (masked_values == len (self .data_arr ))
360
455
361
456
# set the uncertainty
362
- uncertainty = uncertainty_func (self .data_arr , axis = 0 )
457
+
458
+ # This still uses numpy for the median because the astropy
459
+ # code requires that the median function take the argument
460
+ # overwrite_input and bottleneck doesn't allow that argument.
461
+ # This is ugly, but setting ignore_nan to True should make sure
462
+ # that either nans or masks are handled properly.
463
+ if uncertainty_func is sigma_func :
464
+ uncertainty = uncertainty_func (data , axis = 0 , ignore_nan = True )
465
+ else :
466
+ uncertainty = uncertainty_func (data , axis = 0 )
363
467
# Divide uncertainty by the number of pixel (#309)
364
468
uncertainty /= np .sqrt (len (self .data_arr ) - masked_values )
365
469
# Convert uncertainty to plain numpy array (#351)
@@ -370,7 +474,7 @@ def median_combine(self, median_func=ma.median, scale_to=None,
370
474
uncertainty = np .asarray (uncertainty )
371
475
372
476
# create the combined image with a dtype matching the combiner
373
- combined_image = CCDData (np .asarray (data . data , dtype = self .dtype ),
477
+ combined_image = CCDData (np .asarray (medianed , dtype = self .dtype ),
374
478
mask = mask , unit = self .unit ,
375
479
uncertainty = StdDevUncertainty (uncertainty ))
376
480
@@ -380,8 +484,26 @@ def median_combine(self, median_func=ma.median, scale_to=None,
380
484
# return the combined image
381
485
return combined_image
382
486
383
- def average_combine (self , scale_func = ma .average , scale_to = None ,
384
- uncertainty_func = ma .std ):
487
+ def _weighted_sum (self , data , sum_func ):
488
+ """
489
+ Perform weighted sum, used by both ``sum_combine`` and in some cases
490
+ by ``average_combine``.
491
+ """
492
+ if self .weights .shape != data .shape :
493
+ # Add extra axes to the weights for broadcasting
494
+ weights = np .reshape (self .weights , [len (self .weights ), 1 , 1 ])
495
+ else :
496
+ weights = self .weights
497
+
498
+ # Turns out bn.nansum has an implementation that is not
499
+ # precise enough for float32 sums. Doing this should
500
+ # ensure the sums are carried out as float64
501
+ weights = weights .astype ('float64' )
502
+ weighted_sum = sum_func (data * weights , axis = 0 )
503
+ return weighted_sum , weights
504
+
505
+ def average_combine (self , scale_func = None , scale_to = None ,
506
+ uncertainty_func = _default_std (), sum_func = _default_sum ()):
385
507
"""
386
508
Average combine together a set of arrays.
387
509
@@ -397,7 +519,7 @@ def average_combine(self, scale_func=ma.average, scale_to=None,
397
519
----------
398
520
scale_func : function, optional
399
521
Function to calculate the average. Defaults to
400
- `numpy.ma.average `.
522
+ `numpy.nanmean `.
401
523
402
524
scale_to : float or None, optional
403
525
Scaling factor used in the average combined image. If given,
@@ -406,40 +528,57 @@ def average_combine(self, scale_func=ma.average, scale_to=None,
406
528
uncertainty_func : function, optional
407
529
Function to calculate uncertainty. Defaults to `numpy.ma.std`.
408
530
531
+ sum_func : function, optional
532
+ Function used to calculate sums, including the one done to
533
+ find the weighted average. Defaults to `numpy.nansum`.
534
+
409
535
Returns
410
536
-------
411
537
combined_image: `~astropy.nddata.CCDData`
412
538
CCDData object based on the combined input of CCDData objects.
413
539
"""
414
- # set up the data
415
- data , wei = scale_func (self ._get_scaled_data (scale_to ),
416
- axis = 0 , weights = self .weights ,
417
- returned = True )
540
+ data , masked_values , scale_func = \
541
+ self ._combination_setup (scale_func ,
542
+ _default_average (),
543
+ scale_to )
544
+ # # set up the data
545
+ # data = self._get_scaled_data(scale_to)
546
+
547
+ # # Subtitute NaN for masked entries
548
+ # data = self._get_nan_substituted_data(data)
549
+
550
+ # Do NOT modify data after this -- we need it to be intact when we
551
+ # we get to the uncertainty calculation.
552
+ if self .weights is not None :
553
+ weighted_sum , weights = self ._weighted_sum (data , sum_func )
554
+ mean = weighted_sum / sum_func (weights , axis = 0 )
555
+ else :
556
+ mean = scale_func (data , axis = 0 )
557
+
558
+ # calculate the mask
418
559
419
- # set up the mask
420
- masked_values = self .data_arr .mask .sum (axis = 0 )
421
560
mask = (masked_values == len (self .data_arr ))
422
561
423
562
# set up the deviation
424
- uncertainty = uncertainty_func (self . data_arr , axis = 0 )
563
+ uncertainty = uncertainty_func (data , axis = 0 )
425
564
# Divide uncertainty by the number of pixel (#309)
426
- uncertainty /= np .sqrt (len (self . data_arr ) - masked_values )
565
+ uncertainty /= np .sqrt (len (data ) - masked_values )
427
566
# Convert uncertainty to plain numpy array (#351)
428
567
uncertainty = np .asarray (uncertainty )
429
568
430
569
# create the combined image with a dtype that matches the combiner
431
- combined_image = CCDData (np .asarray (data . data , dtype = self .dtype ),
570
+ combined_image = CCDData (np .asarray (mean , dtype = self .dtype ),
432
571
mask = mask , unit = self .unit ,
433
572
uncertainty = StdDevUncertainty (uncertainty ))
434
573
435
574
# update the meta data
436
- combined_image .meta ['NCOMBINE' ] = len (self . data_arr )
575
+ combined_image .meta ['NCOMBINE' ] = len (data )
437
576
438
577
# return the combined image
439
578
return combined_image
440
579
441
- def sum_combine (self , sum_func = ma . sum , scale_to = None ,
442
- uncertainty_func = ma . std ):
580
+ def sum_combine (self , sum_func = None , scale_to = None ,
581
+ uncertainty_func = _default_std () ):
443
582
"""
444
583
Sum combine together a set of arrays.
445
584
@@ -458,7 +597,7 @@ def sum_combine(self, sum_func=ma.sum, scale_to=None,
458
597
----------
459
598
sum_func : function, optional
460
599
Function to calculate the sum. Defaults to
461
- `numpy.ma.sum `.
600
+ `numpy.nansum` or `bottleneck.nansum `.
462
601
463
602
scale_to : float or None, optional
464
603
Scaling factor used in the sum combined image. If given,
@@ -472,24 +611,31 @@ def sum_combine(self, sum_func=ma.sum, scale_to=None,
472
611
combined_image: `~astropy.nddata.CCDData`
473
612
CCDData object based on the combined input of CCDData objects.
474
613
"""
475
- # set up the data
476
- data = sum_func (self ._get_scaled_data (scale_to ), axis = 0 )
614
+
615
+ data , masked_values , sum_func = \
616
+ self ._combination_setup (sum_func ,
617
+ _default_sum (),
618
+ scale_to )
619
+
620
+ if self .weights is not None :
621
+ summed , weights = self ._weighted_sum (data , sum_func )
622
+ else :
623
+ summed = sum_func (data , axis = 0 )
477
624
478
625
# set up the mask
479
- masked_values = self .data_arr .mask .sum (axis = 0 )
480
626
mask = (masked_values == len (self .data_arr ))
481
627
482
628
# set up the deviation
483
- uncertainty = uncertainty_func (self . data_arr , axis = 0 )
629
+ uncertainty = uncertainty_func (data , axis = 0 )
484
630
# Divide uncertainty by the number of pixel (#309)
485
- uncertainty /= np .sqrt (len (self . data_arr ) - masked_values )
631
+ uncertainty /= np .sqrt (len (data ) - masked_values )
486
632
# Convert uncertainty to plain numpy array (#351)
487
633
uncertainty = np .asarray (uncertainty )
488
634
# Multiply uncertainty by square root of the number of images
489
- uncertainty *= len (self . data_arr ) - masked_values
635
+ uncertainty *= len (data ) - masked_values
490
636
491
637
# create the combined image with a dtype that matches the combiner
492
- combined_image = CCDData (np .asarray (data . data , dtype = self .dtype ),
638
+ combined_image = CCDData (np .asarray (summed , dtype = self .dtype ),
493
639
mask = mask , unit = self .unit ,
494
640
uncertainty = StdDevUncertainty (uncertainty ))
495
641
@@ -730,7 +876,7 @@ def combine(img_list, output_file=None,
730
876
else :
731
877
memory_factor = 2
732
878
733
- memory_factor *= 1.5
879
+ memory_factor *= 1.3
734
880
735
881
# determine the number of chunks to split the images into
736
882
no_chunks = int ((memory_factor * size_of_an_img * no_of_img ) / mem_limit ) + 1
0 commit comments