Skip to content

Commit

Permalink
reduce the scope of the PR
Browse files Browse the repository at this point in the history
Signed-off-by: martinvuyk <[email protected]>
  • Loading branch information
martinvuyk committed Dec 19, 2024
1 parent 6b35221 commit ba2c415
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 125 deletions.
18 changes: 9 additions & 9 deletions stdlib/src/builtin/string_literal.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ struct StringLiteral(

@always_inline
fn as_bytes(self) -> Span[Byte, StaticConstantOrigin]:
"""Returns a contiguous Span of the bytes owned by this string.
"""Returns a contiguous slice of the bytes owned by this string.
Returns:
A contiguous slice pointing to the bytes owned by this string.
Expand Down Expand Up @@ -691,7 +691,7 @@ struct StringLiteral(
return result

@always_inline
fn split(self, sep: StringSlice, maxsplit: Int) -> List[String]:
fn split(self, sep: StringSlice, maxsplit: Int) -> List[StaticString]:
"""Split the string by a separator.
Args:
Expand All @@ -712,10 +712,10 @@ struct StringLiteral(
```
.
"""
return _split[has_maxsplit=True](self, sep, maxsplit)
return self.as_string_slice().split(sep, maxsplit)

@always_inline
fn split(self, sep: StringSlice) -> List[String]:
fn split(self, sep: StringSlice) -> List[StaticString]:
"""Split the string by a separator.
Args:
Expand All @@ -737,10 +737,10 @@ struct StringLiteral(
```
.
"""
return _split[has_maxsplit=False](self, sep, -1)
return self.as_string_slice().split(sep)

@always_inline
fn split(self, *, maxsplit: Int) -> List[String]:
fn split(self, *, maxsplit: Int) -> List[StaticString]:
"""Split the string by every Whitespace separator.
Args:
Expand All @@ -757,10 +757,10 @@ struct StringLiteral(
```
.
"""
return _split[has_maxsplit=True](self, None, maxsplit)
return self.as_string_slice().split(maxsplit=maxsplit)

@always_inline
fn split(self, sep: NoneType = None) -> List[String]:
fn split(self, sep: NoneType = None) -> List[StaticString]:
"""Split the string by every Whitespace separator.
Args:
Expand All @@ -784,7 +784,7 @@ struct StringLiteral(
```
.
"""
return _split[has_maxsplit=False](self, None, -1)
return self.as_string_slice().split()

fn splitlines(self, keepends: Bool = False) -> List[String]:
"""Split the string literal at line boundaries. This corresponds to Python's
Expand Down
24 changes: 16 additions & 8 deletions stdlib/src/collections/string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -1747,7 +1747,9 @@ struct String(
return self.as_string_slice().isspace()

@always_inline
fn split(self, sep: StringSlice, maxsplit: Int) -> List[String]:
fn split(
ref self, sep: StringSlice, maxsplit: Int
) -> List[StringSlice[__origin_of(self)]]:
"""Split the string by a separator.
Args:
Expand All @@ -1768,10 +1770,12 @@ struct String(
```
.
"""
return _split[has_maxsplit=True](self, sep, maxsplit)
return self.as_string_slice().split(sep, maxsplit)

@always_inline
fn split(self, sep: StringSlice) -> List[String]:
fn split(
ref self, sep: StringSlice
) -> List[StringSlice[__origin_of(self)]]:
"""Split the string by a separator.
Args:
Expand All @@ -1793,10 +1797,12 @@ struct String(
```
.
"""
return _split[has_maxsplit=False](self, sep, -1)
return self.as_string_slice().split(self, sep)

@always_inline
fn split(self, *, maxsplit: Int) -> List[String]:
fn split(
ref self, *, maxsplit: Int
) -> List[StringSlice[__origin_of(self)]]:
"""Split the string by every Whitespace separator.
Args:
Expand All @@ -1813,10 +1819,12 @@ struct String(
```
.
"""
return _split[has_maxsplit=True](self, None, maxsplit)
return self.as_string_slice().split(self, maxsplit=maxsplit)

@always_inline
fn split(self, sep: NoneType = None) -> List[String]:
fn split(
ref self, sep: NoneType = None
) -> List[StringSlice[__origin_of(self)]]:
"""Split the string by every Whitespace separator.
Args:
Expand All @@ -1840,7 +1848,7 @@ struct String(
```
.
"""
return _split[has_maxsplit=False](self, None, -1)
return self.as_string_slice().split()

fn splitlines(self, keepends: Bool = False) -> List[String]:
"""Split the string at line boundaries. This corresponds to Python's
Expand Down
8 changes: 5 additions & 3 deletions stdlib/src/memory/span.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ from sys.info import simdwidthof


trait AsBytes:
"""The `AsBytes` trait denotes a type that can be returned as a byte span.
"""
The `AsBytes` trait denotes a type that can be returned as a immutable byte
span.
"""

fn as_bytes(ref self) -> Span[Byte, __origin_of(self)]:
"""Returns a contiguous slice of bytes.
"""Returns a contiguous slice of the bytes owned by this string.
Returns:
A contiguous slice pointing to bytes.
A contiguous slice pointing to the bytes owned by this string.
Notes:
This does not include the trailing null terminator.
Expand Down
132 changes: 29 additions & 103 deletions stdlib/src/utils/string_slice.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ fn _count_utf8_continuation_bytes(span: Span[Byte]) -> Int:
return amnt


@always_inline
fn _unicode_codepoint_utf8_byte_length(c: Int) -> Int:
debug_assert(
0 <= c <= 0x10FFFF, "Value: ", c, " is not a valid Unicode code point"
Expand Down Expand Up @@ -113,7 +112,6 @@ fn _shift_unicode_to_utf8(ptr: UnsafePointer[UInt8], c: Int, num_bytes: Int):
ptr[i] = ((c >> shift) & 0b0011_1111) | 0b1000_0000


@always_inline("nodebug")
fn _utf8_byte_type(b: SIMD[DType.uint8, _], /) -> __type_of(b):
"""UTF-8 byte type.
Expand Down Expand Up @@ -756,10 +754,10 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](

@always_inline
fn as_bytes(self) -> Span[Byte, origin]:
"""Returns a contiguous slice of bytes.
"""Get the sequence of encoded bytes of the underlying string.
Returns:
A contiguous slice pointing to bytes.
A slice containing the underlying sequence of encoded bytes.
Notes:
This does not include the trailing null terminator.
Expand Down Expand Up @@ -909,7 +907,7 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
"""
return _FormatCurlyEntry.format(self, args)

fn find(self, substr: StringSlice, start: Int = 0) -> Int:
fn find(ref self, substr: StringSlice, start: Int = 0) -> Int:
"""Finds the offset of the first occurrence of `substr` starting at
`start`. If not found, returns `-1`.
Expand All @@ -920,28 +918,27 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
Returns:
The offset of `substr` relative to the beginning of the string.
"""

var sub = substr.unsafe_ptr()
var sub_len = substr.byte_length()
if sub_len == 0:
if not substr:
return 0

var s_span = self.as_bytes()
if len(s_span) < sub_len + start:
if self.byte_length() < substr.byte_length() + start:
return -1

# The substring to search within, offset from the beginning if `start`
# is positive, and offset from the end if `start` is negative.
var haystack = self._from_start(start).as_bytes()
var haystack_str = self._from_start(start)

var loc = stringref._memmem(
haystack.unsafe_ptr(), len(haystack), sub, sub_len
haystack_str.unsafe_ptr(),
haystack_str.byte_length(),
substr.unsafe_ptr(),
substr.byte_length(),
)

if not loc:
return -1

return int(loc) - int(s_span.unsafe_ptr())
return int(loc) - int(self.unsafe_ptr())

fn rfind(self, substr: StringSlice, start: Int = 0) -> Int:
"""Finds the offset of the last occurrence of `substr` starting at
Expand Down Expand Up @@ -1017,16 +1014,9 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
return self.byte_length() != 0

@always_inline
fn split[
O: ImmutableOrigin, //
](self: StringSlice[O], sep: StringSlice, maxsplit: Int) -> List[
StringSlice[O]
]:
fn split(self, sep: StringSlice, maxsplit: Int) -> List[Self]:
"""Split the string by a separator.
Parameters:
O: The immutable origin.
Args:
sep: The string to split on.
maxsplit: The maximum amount of items to split from String.
Expand All @@ -1048,14 +1038,9 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
return _split[has_maxsplit=True](self, sep, maxsplit)

@always_inline
fn split[
O: ImmutableOrigin, //
](self: StringSlice[O], sep: StringSlice) -> List[StringSlice[O]]:
fn split(self, sep: StringSlice) -> List[Self]:
"""Split the string by a separator.
Parameters:
O: The immutable origin.
Args:
sep: The string to split on.
Expand All @@ -1078,14 +1063,9 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
return _split[has_maxsplit=False](self, sep, -1)

@always_inline
fn split[
O: ImmutableOrigin
](self: StringSlice[O], *, maxsplit: Int) -> List[StringSlice[O]]:
fn split(self, *, maxsplit: Int) -> List[Self]:
"""Split the string by every Whitespace separator.
Parameters:
O: The immutable origin.
Args:
maxsplit: The maximum amount of items to split from String.
Expand All @@ -1103,14 +1083,9 @@ struct StringSlice[mut: Bool, //, origin: Origin[mut]](
return _split[has_maxsplit=True](self, None, maxsplit)

@always_inline
fn split[
O: ImmutableOrigin
](self: StringSlice[O], sep: NoneType = None) -> List[StringSlice[O]]:
fn split(self, sep: NoneType = None) -> List[Self]:
"""Split the string by every Whitespace separator.
Parameters:
O: The immutable origin.
Args:
sep: None.
Expand Down Expand Up @@ -1236,15 +1211,15 @@ fn _to_string_list[
len_fn: fn (T) -> Int,
unsafe_ptr_fn: fn (T) -> UnsafePointer[Byte],
](items: List[T]) -> List[String]:
var i_len = len(items)
var i_ptr = items.unsafe_ptr()
var out_ptr = UnsafePointer[String].alloc(i_len)
i_len = len(items)
i_ptr = items.unsafe_ptr()
out_ptr = UnsafePointer[String].alloc(i_len)

for i in range(i_len):
var og_len = len_fn(i_ptr[i])
var f_len = og_len + 1 # null terminator
var p = UnsafePointer[Byte].alloc(f_len)
var og_ptr = unsafe_ptr_fn(i_ptr[i])
og_len = len_fn(i_ptr[i])
f_len = og_len + 1 # null terminator
p = UnsafePointer[Byte].alloc(f_len)
og_ptr = unsafe_ptr_fn(i_ptr[i])
memcpy(p, og_ptr, og_len)
p[og_len] = 0 # null terminator
buf = String._buffer_type(ptr=p, length=f_len, capacity=f_len)
Expand Down Expand Up @@ -1340,62 +1315,8 @@ fn _is_newline_char[
return False


@always_inline
fn _split[
has_maxsplit: Bool
](src_str: String, sep: NoneType, maxsplit: Int) -> List[String]:
return _to_string_list(
_split_impl[has_maxsplit](src_str.as_string_slice(), maxsplit)
)


@always_inline
fn _split[
has_maxsplit: Bool
](src_str: String, sep: StringSlice, maxsplit: Int) -> List[String]:
return _to_string_list(
_split_impl[has_maxsplit](src_str.as_string_slice(), sep, maxsplit)
)


@always_inline
fn _split[
has_maxsplit: Bool
](src_str: StringLiteral, sep: NoneType, maxsplit: Int) -> List[String]:
return _to_string_list(
_split_impl[has_maxsplit](src_str.as_string_slice(), maxsplit)
)


@always_inline
fn _split[
has_maxsplit: Bool
](src_str: StringLiteral, sep: StringSlice, maxsplit: Int) -> List[String]:
return _to_string_list(
_split_impl[has_maxsplit](src_str.as_string_slice(), sep, maxsplit)
)


@always_inline
fn _split[
has_maxsplit: Bool
](src_str: StringSlice, sep: NoneType, maxsplit: Int) -> List[
__type_of(src_str)
]:
return _split_impl[has_maxsplit](src_str, maxsplit)


@always_inline
fn _split[
has_maxsplit: Bool
](src_str: StringSlice, sep: StringSlice, maxsplit: Int) -> List[
__type_of(src_str)
]:
return _split_impl[has_maxsplit](src_str, sep, maxsplit)


fn _split_impl[
has_maxsplit: Bool
](
src_str: StringSlice,
sep: StringSlice,
Expand Down Expand Up @@ -1448,9 +1369,14 @@ fn _split_impl[
lhs = rhs + sep_len


fn _split_impl[
fn _split[
has_maxsplit: Bool
](src_str: StringSlice, maxsplit: Int, out output: List[__type_of(src_str)]):
](
src_str: StringSlice,
sep: NoneType,
maxsplit: Int,
out output: List[__type_of(src_str)],
):
alias S = __type_of(src_str)
alias O = __type_of(src_str).origin
alias prealloc = 32 # guessing, Python's implementation uses 12
Expand Down
Loading

0 comments on commit ba2c415

Please sign in to comment.