-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubstring_with_concatention_of_all_words.py
81 lines (75 loc) · 4.4 KB
/
substring_with_concatention_of_all_words.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Jacobus Burger (2023)
# see: https://leetcode.com/problems/substring-with-concatenation-of-all-words/description/
# My first solution to a hard string problem. This is the most straight-forward approach, and it passes the time limit, but it has a terrible time complexity of roughly O(n**2).
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
# preconditions to end early on
if any(word not in s for word in words): # there is a word we can't match somewhere in s
return []
# DP solution, iteratively match word slices (substrings)
# against the collection of yet unmatched words, if they
# all match (no unmatched words left) then we've found
# a substring with concatenation of all words, if any
# slice doesn't match the remaining words, then we've reached
# an invalid substring and can quit, moving to the next one.
# To accomplish this, we use a left and right pointer in an
# outer and innter loop, the left pointer represents the left
# end of our sliding window, the right pointer represents the
# current substring partition we're matching against our
# list of unmatched words. We move r with l when we move l, but
# for every l we move r on its own to check substrings ahead.
# The time complexity is laughably bad. This should be
# approximately O(n * n/m) where m is words[i].length
indices = [] # the starts of each substring w/ concat all words
word_length = len(words[0]) # len of word (size of substrs)
unmatched = words.copy() # the list of words yet unmatched
l = r = 0 # left and right pointers of window
while l < len(s):
while r < len(s):
word = s[r : r + word_length]
if word in unmatched:
unmatched.remove(word)
else:
break
if unmatched == []:
indices.append(l)
break
r += word_length
l += 1
r = l
unmatched = words.copy()
return indices
# This is my better solution using dictionaries and other optimizations.
from collections import Counter
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
# This time around is better, I use two counters (dictionaries of occurences) to keep a memo
# of how many times each word has occured in the current substring we're scanning (the inner loop,
# the window from left pointer to right pointer) and then I scan across that growing window until
# I encounter either a word that is not in words, or more words than there is a count of, and quit
# early, unless the occurences of substrings in match is the same as in count, then the left index
# is recorded and we continue.
# One optimization that can be carried to the previous solution is realizing that we can quit early once
# the left pointer has reached the last spot it can that would still allow the length of a full
# concatenated substring of words to fit into the "frame" of the string s, and that we only need to
# iterate the right pointer for the length of the full concatenated substirng of words as well, which
# reduces the amount of work that needs to be done.
# Another improvement here that can be carried elsewhere is the utilization of a memo with a hashtable (a dictionary)
# instead of relying on constant copying and removing from lists (which add to time complexity).
word_length = len(words[0]) # length of each word
combo_length = len(words) * word_length # length of any concat substr
count = Counter(words) # memo of occurences of each word in words
indices = [] # start of complete concat substr
for left in range(len(s) - combo_length + 1):
match = Counter()
for right in range(left, left + combo_length, word_length):
word = s[right : right + word_length]
if word not in count:
break
match[word] += 1
if match[word] > count[word]:
break
if match == count:
indices.append(left)
break
return indices