From 79adeb9983ae784b73ef9dea10aa0c852fc39718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20D=C3=BCrr?= Date: Fri, 1 Mar 2024 15:38:35 +0100 Subject: [PATCH] added suffix_array --- CHANGELOG.md | 4 +++ setup.py | 2 +- tests/test_tryalgo.py | 7 +++++ tryalgo/suffix_array.py | 61 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 tryalgo/suffix_array.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5691290..95718b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog +## 1.7.0 + +- added suffix_array + ## 1.6.1 - corrected dyn_prog_Monge diff --git a/setup.py b/setup.py index 125fb56..d1d4fd5 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setup( name='tryalgo', - version='1.6.1', + version='1.7.0', description=( 'Algorithms and data structures ' 'for preparing programming competitions' diff --git a/tests/test_tryalgo.py b/tests/test_tryalgo.py index 816379f..e4baa4f 100755 --- a/tests/test_tryalgo.py +++ b/tests/test_tryalgo.py @@ -102,6 +102,7 @@ def isclose(a, b, rel_tol, abs_tol): from tryalgo.subsetsum_divide import subset_sum2 as subset_sum2 from tryalgo.subsetsum import subset_sum as subset_sum3, coin_change from tryalgo.sudoku import sudoku +from tryalgo.suffix_array import sort_class, sort_cyclic_shifts, suffix_array from tryalgo.three_partition import three_partition from tryalgo.topological_order import topological_order_dfs, topological_order from tryalgo.tortoise_hare import tortoise_hare @@ -1853,6 +1854,12 @@ def test_sudoku(self): self.assertEqual(sorted(G[n * i + di][n * j + dj] for di in range(n) for dj in range(n)), all_terms) + def test_suffix_array(self): + L = [11, 20, 16, 21, 12, 17, 14, 25, 10, 15, 22, 7, 0, 3, 18, 5, 13, 23, 8, 1, 4, 19, 6, 24, 9, 2] + self.assertEqual(suffix_array("abracadabra0AbRa4Cad14abra"), L) + self.assertEqual(sort_class("aaba"), ([0, 1, 3, 2], [0, 0, 1, 0])) + self.assertEqual(sort_cyclic_shifts("abaab"), [2, 0, 3, 1, 4]) + def test_ternary_search(self): x = ternary_search(lambda x: -x * (x - 4), 0, 4) self.assertTrue(1.9 <= x <= 2.1) diff --git a/tryalgo/suffix_array.py b/tryalgo/suffix_array.py new file mode 100644 index 0000000..428b533 --- /dev/null +++ b/tryalgo/suffix_array.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""\ +suffix array, +but only the O(n log^2(n)) implementation, which is enough for most programming contest problems + +christoph dürr 2024 +""" + +def sort_class(s): + """ sorts s and returns additional information + + :param s: string or list + :returns p, c: p[j]=i if s[i] has rank j in sorted(s) and c[i] is rank of s[i] in sorted(set(s)) + :complexity: O(n log n) or better if sort makes use of specific values in s + """ + S_index = [(x, i) for i, x in enumerate(s)] + p = [i for x, i in sorted(S_index)] + x2c = {x : i for i, x in enumerate(sorted(set(s)))} + c = [x2c[x] for x in s] + return p, c + + +def sort_cyclic_shifts(s): + """ given a string s, sort lexicographically all cyclic shifts of s. + + The i-th cyclic shift of s is s[i:] + s[i:] + :param s: string or list + :returns L: such that L[j]=i if the i-th cyclic shift of s has rank j + :complexity: O(n * log(n)^2) + """ + p, c = sort_class(s) + n = len(s) + K = 1 + while K <= n: + L = [(c[i], c[(i + K) % n]) for i in range(n)] + p, c = sort_class(L) + K <<= 1 + return p + +def suffix_array(s): + """ given a string s, sort lexicographically suffixes of s + :param s: string + :returns: R with R[i] is j such that s[j:] has rank i + :complexity: O(n log^2 n) + """ + special = chr(0) + assert special < min(s) + L = sort_cyclic_shifts(s + special) + return L[1:] + +if __name__ == "__main__": + # tested at https://www.spoj.com/problems/SARRAY/ + import sys + + def readstr(): return sys.stdin.readline().rstrip() + def readstrs(): return readstr().split() + def readints(): return map(int, readstrs()) + + for val in suffix_array(readstr()): + print(val)