From 1d039276701e37921c5f899712a30c0785b9e118 Mon Sep 17 00:00:00 2001 From: ramalh Date: Fri, 25 Aug 2023 17:23:43 +0300 Subject: [PATCH 1/3] add merge_direct_overlaps() method --- intervaltree/intervaltree.py | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/intervaltree/intervaltree.py b/intervaltree/intervaltree.py index 2548eed..d5b93f7 100644 --- a/intervaltree/intervaltree.py +++ b/intervaltree/intervaltree.py @@ -707,6 +707,59 @@ def new_series(): self.__init__(merged) + def merge_direct_overlaps(self, data_reducer=None, strict=True): + """ + Finds all intervals with overlapping ranges and merges them + seperately into a single interval. If provided, uses + data_reducer with similar semantics to Python's built-in + reduce(reducer_func[, initializer]), as follows: + + If data_reducer is set to a function, combines the data + fields of the Intervals with + current_reduced_data = data_reducer(current_reduced_data, new_data) + If data_reducer is None, the merged Interval's data + field will be set to None, ignoring all the data fields + of the merged Intervals. + + If strict is True (default), intervals are only merged if + their ranges actually overlap; adjacent, touching intervals + will not be merged. If strict is False, intervals are merged + even if they are only end-to-end adjacent. + + Completes in O(n*logn). + """ + if not self: + return + + if data_reducer == None: + data_reducer = lambda *x: None + + sorted_intervals = sorted(self.all_intervals) # get sorted intervals + merged = [sorted_intervals[0]] + li = len(sorted_intervals) + highest = sorted_intervals[0][0] # var for checking if interval merged before + i = -1 + + for inv in sorted_intervals: + i += 1 + j = i + 1 + while j < li and ( inv.end > sorted_intervals[j].begin or + not strict and inv.end == sorted_intervals[j].begin ): + next_inv = sorted_intervals[j] + upper = max(inv.end, next_inv.end) + if inv.data and next_inv.data: # if an interval has no data, skip data + data = data_reducer(inv.data, next_inv.data) + merged.append(Interval(inv.begin, upper, data)) + else: + merged.append(Interval(inv.begin, upper)) + if inv.end > highest: + highest = inv.end + j += 1 + if inv.begin >= highest: # if a variable not merged before, add to tree + merged.append(inv) + + self.__init__(merged[1:]) + def merge_equals(self, data_reducer=None, data_initializer=None): """ Finds all intervals with equal ranges and merges them From 34a42807bbd6a4b14ce54d778dba544d472fa3c6 Mon Sep 17 00:00:00 2001 From: ramalh Date: Fri, 25 Aug 2023 17:23:52 +0300 Subject: [PATCH 2/3] add info for merge_direct_overlaps() method --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b7e24ae..83317c6 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,7 @@ Features * `slice(point)` (slice intervals at `point`) * `split_overlaps()` (slice at all interval boundaries, optionally modifying the data field) * `merge_overlaps()` (joins overlapping intervals into a single interval, optionally merging the data fields) + * `merge_direct_overlaps()` (joins only directly-overlapping intervals into a single interval, optionally merging the data fields) * `merge_equals()` (joins intervals with matching ranges into a single interval, optionally merging the data fields) * `merge_neighbors()` (joins adjacent intervals into a single interval if the distance between their range terminals is less than or equal to a given distance. Optionally merges overlapping intervals. Can also merge the data fields.) From dc9118b4338dad0dcc3ed535c36f64a2e7e7866e Mon Sep 17 00:00:00 2001 From: ramalh Date: Fri, 25 Aug 2023 17:24:02 +0300 Subject: [PATCH 3/3] add 4 tests for merge_direct_overlaps() method --- test/intervaltree_methods/restructure_test.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/test/intervaltree_methods/restructure_test.py b/test/intervaltree_methods/restructure_test.py index 9d178b3..f84ef68 100644 --- a/test/intervaltree_methods/restructure_test.py +++ b/test/intervaltree_methods/restructure_test.py @@ -152,6 +152,54 @@ def reducer(old, new): ]) ] +# ----------------------------------------------------------------------------- +# MERGE_DIRECT_OVERLAPS +# ----------------------------------------------------------------------------- +def test_merge_direct_overlaps_empty(): + t = IntervalTree() + t.merge_direct_overlaps() + t.verify() + assert len(t) == 0 + + +def test_merge_direct_overlaps_gapless(): + # default strict=True + t = IntervalTree.from_tuples(data.ivs2.data) + t.merge_direct_overlaps() + t.verify() + assert [(iv.begin, iv.end, iv.data) for iv in sorted(t)] == data.ivs2.data + + # strict=False + t = IntervalTree.from_tuples(data.ivs2.data) + rng = t.range() + t.merge_direct_overlaps(strict=False) + t.verify() + assert len(t) == len(data.ivs2.data) + + +def test_merge_direct_overlaps_with_gap(): + t = IntervalTree.from_tuples(data.ivs1.data) + t.merge_direct_overlaps() + t.verify() + assert len(t) > 2 + + +def test_merge_direct_overlaps_reducer_wo_initializer(): + def reducer(old, new): + return "%s, %s" % (old, new) + # empty tree + e = IntervalTree() + e.merge_direct_overlaps(data_reducer=reducer) + e.verify() + assert not e + + # one Interval in tree + o = IntervalTree.from_tuples([(1, 2, 'hello')]) + o.merge_direct_overlaps(data_reducer=reducer) + o.verify() + assert len(o) == 1 + assert sorted(o) == [Interval(1, 2, 'hello')] + # ----------------------------------------------------------------------------- # MERGE_EQUALS