Skip to content

Commit 090a1d4

Browse files
committed
Add extracting APIs that reset span bounds
This change adds three `ParserSpan.extracting` APIs that yield new `ParserSpan` instances that have their internal span boundaries shrunk to fit, instead of just insetting the boundaries at the `ParserSpan` level. An extracted span can be passed off to a parsing function without worry that an absolute seek will go beyond the visible boundaries of the span.
1 parent 7c3165b commit 090a1d4

File tree

3 files changed

+313
-3
lines changed

3 files changed

+313
-3
lines changed

Sources/BinaryParsing/Parser Types/ParserSpan.swift

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ public struct ParserSpan: ~Escapable, ~Copyable {
6969
@inlinable
7070
@_lifetime(copy self)
7171
borrowing get {
72-
_bytes._extracting(droppingFirst: _lowerBound)._extracting(first: count)
72+
unsafe _bytes.extracting(
73+
unchecked: Range(uncheckedBounds: (_lowerBound, _upperBound)))
7374
}
7475
}
7576
}
@@ -130,6 +131,12 @@ extension ParserSpan {
130131
fromUncheckedByteOffset: _lowerBound &+ i,
131132
as: UInt8.self)
132133
}
134+
135+
@usableFromInline
136+
@_lifetime(copy self)
137+
consuming func extracted() -> ParserSpan {
138+
Self(bytes)
139+
}
133140
}
134141

135142
extension ParserSpan {

Sources/BinaryParsing/Parser Types/Slicing.swift

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12+
// MARK: ParserSpan Slicing
13+
1214
extension ParserSpan {
1315
/// Returns a new parser span covering the specified number of bytes from the
1416
/// start of this parser span, shrinking this parser span by the same amount.
@@ -35,7 +37,7 @@ extension ParserSpan {
3537
throw ParsingError(status: .invalidValue, location: startPosition)
3638
}
3739
guard count >= byteCount else {
38-
throw ParsingError(status: .invalidValue, location: startPosition)
40+
throw ParsingError(status: .insufficientData, location: startPosition)
3941
}
4042
return divide(atOffset: byteCount)
4143
}
@@ -74,9 +76,10 @@ extension ParserSpan {
7476
}
7577
return try _divide(atByteOffset: byteCount)
7678
}
77-
7879
}
7980

81+
// MARK: Range Slicing
82+
8083
extension ParserSpan {
8184
/// Returns a parser range covering the specified number of bytes from the
8285
/// start of this parser span, shrinking this parser span by the same amount.
@@ -154,6 +157,8 @@ extension ParserSpan {
154157
}
155158
}
156159

160+
// MARK: UTF8Span
161+
157162
extension ParserSpan {
158163
/// Returns a `UTF8Span` covering the specified number of bytes from the
159164
/// start of this parser span, shrinking this parser span by the same amount.
@@ -188,3 +193,89 @@ extension ParserSpan {
188193
}
189194
}
190195
}
196+
197+
// MARK: Extracting
198+
199+
extension ParserSpan {
200+
/// Extracts and returns a new parser span covering the specified number of
201+
/// bytes from the start of this parser span, shrinking this parser span by
202+
/// the same amount.
203+
///
204+
/// Use `extract(byteCount:)` to retrieve a separate span for a parsing
205+
/// sub-task when you know the size of the task. For example, each chunk in
206+
/// the PNG format begins with an identifier and the size of the chunk, in
207+
/// bytes. A PNG chunk parser could use this method to slice the correct size
208+
/// for each chunk, and limit parsing to within the resulting span.
209+
///
210+
/// An _extracted_ parser span doesn't retain information about the bounds of
211+
/// the original span, unlike a _slice_. If you need to seek beyond the
212+
/// immediate bounds of the returned span, use the ``sliceSpan(byteCount:)``
213+
/// method instead.
214+
///
215+
/// - Parameter byteCount: The number of bytes to include in the resulting
216+
/// span. `byteCount` must be non-negative, and less than or equal to the
217+
/// number of bytes remaining in the span.
218+
/// - Returns: A new parser span covering `byteCount` bytes. The returned
219+
/// parser span has a `startPosition` of zero and an `endPosition` equal
220+
/// to `byteCount`.
221+
/// - Throws: A `ParsingError` if `byteCount` cannot be represented as an
222+
/// `Int`, if it's negative, or if there aren't enough bytes in the
223+
/// original span.
224+
@inlinable
225+
@_lifetime(copy self)
226+
public mutating func extract(byteCount: some FixedWidthInteger)
227+
throws(ParsingError) -> ParserSpan
228+
{
229+
try sliceSpan(byteCount: byteCount).extracted()
230+
}
231+
232+
/// Extracts and returns a new parser span covering the specified number of
233+
/// bytes calculated as the product of object count and stride from the start
234+
/// of this parser span, shrinking this parser span by the same amount.
235+
///
236+
/// Use `extract(objectStride:objectCount:)` when you need to retrieve a
237+
/// span for parsing a collection of fixed-size objects. This is particularly
238+
/// useful when parsing arrays of binary data with known element sizes. For
239+
/// example, if you're parsing an array of 4-byte integers and know there are
240+
/// 10 elements, you can use:
241+
///
242+
/// let intArraySpan = try span.extract(objectStride: 4, objectCount: 10)
243+
///
244+
/// An _extracted_ parser span doesn't retain information about the bounds of
245+
/// the original span, unlike a _slice_. If you need to seek beyond the
246+
/// immediate bounds of the returned span, use the ``sliceSpan(objectStride:objectCount:)``
247+
/// method instead.
248+
///
249+
/// - Parameters:
250+
/// - objectStride: The size in bytes of each object in the collection.
251+
/// - objectCount: The number of objects to include in the resulting range.
252+
/// - Returns: A parser range covering `objectStride * objectCount` bytes,
253+
/// with a `startPosition` of zero.
254+
/// - Throws: A `ParsingError` if either `objectStride` or `objectCount`
255+
/// cannot be represented as an `Int`, if their product would overflow, or
256+
/// if the product is not in the range `0...count`.
257+
@inlinable
258+
@_lifetime(copy self)
259+
public mutating func extract(
260+
objectStride: some FixedWidthInteger,
261+
objectCount: some FixedWidthInteger
262+
) throws(ParsingError) -> ParserSpan {
263+
try sliceSpan(objectStride: objectStride, objectCount: objectCount)
264+
.extracted()
265+
}
266+
267+
/// Extracts and returns a parser span covering the remaining bytes in this
268+
/// parser span.
269+
///
270+
/// An _extracted_ parser span doesn't retain information about the bounds of
271+
/// the original span, unlike a _slice_.
272+
///
273+
/// - Returns: A parser range covering the rest of the memory represented
274+
/// by this parser span, with a `startPosition` of zero and `endPosition`
275+
/// equal to the remaining number of bytes.
276+
@inlinable
277+
@_lifetime(copy self)
278+
public mutating func extractRemaining() -> ParserSpan {
279+
divide(atOffset: self.count).extracted()
280+
}
281+
}
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift Binary Parsing open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
import BinaryParsing
13+
import Testing
14+
15+
private let buffer: [UInt8] = [
16+
0, 1, 0, 2, 0, 3, 0, 4,
17+
0, 5, 0, 6, 0, 7, 0, 0,
18+
]
19+
20+
private let emptyBuffer: [UInt8] = []
21+
22+
struct ExtractingTests {
23+
@Test func extractByteCount() throws {
24+
try buffer.withParserSpan { input in
25+
var firstSpan = try input.extract(byteCount: 4)
26+
#expect(firstSpan.startPosition == 0)
27+
#expect(firstSpan.count == 4)
28+
29+
// Verify contents of the extracted span
30+
let firstValue = try UInt16(parsingBigEndian: &firstSpan)
31+
let secondValue = try UInt16(parsingBigEndian: &firstSpan)
32+
#expect(firstValue == 1)
33+
#expect(secondValue == 2)
34+
#expect(firstSpan.count == 0)
35+
36+
// Input position should advance
37+
#expect(input.startPosition == 4)
38+
#expect(input.count == 12)
39+
40+
// Extract another span after advancing the input
41+
_ = try input.seek(toRelativeOffset: 2)
42+
var secondSpan = try input.extract(byteCount: 4)
43+
#expect(secondSpan.startPosition == 0) // Extracted span starts at 0
44+
#expect(secondSpan.count == 4)
45+
46+
// Verify the content of the second extracted span
47+
let thirdValue = try UInt16(parsingBigEndian: &secondSpan)
48+
let fourthValue = try UInt16(parsingBigEndian: &secondSpan)
49+
#expect(thirdValue == 4)
50+
#expect(fourthValue == 5)
51+
52+
// Try extracting with zero byteCount
53+
let emptySpan = try input.extract(byteCount: 0)
54+
#expect(emptySpan.count == 0)
55+
#expect(emptySpan.startPosition == 0)
56+
57+
// Attempt to extract more than available
58+
#expect(throws: ParsingError.self) {
59+
_ = try input.extract(byteCount: 11)
60+
}
61+
62+
// Try with negative byteCount
63+
#expect(throws: ParsingError.self) {
64+
_ = try input.extract(byteCount: -1)
65+
}
66+
}
67+
68+
// Test with empty buffer
69+
try emptyBuffer.withParserSpan { input in
70+
// Zero byteCount should succeed
71+
let emptySpan = try input.extract(byteCount: 0)
72+
#expect(emptySpan.count == 0)
73+
#expect(emptySpan.startPosition == 0)
74+
75+
// Any positive byteCount should fail
76+
#expect(throws: ParsingError.self) {
77+
_ = try input.extract(byteCount: 1)
78+
}
79+
}
80+
}
81+
82+
@Test func extractObjectCount() throws {
83+
try buffer.withParserSpan { input in
84+
// 2 objects of 2 bytes each
85+
var firstSpan = try input.extract(objectStride: 2, objectCount: 2)
86+
#expect(firstSpan.startPosition == 0)
87+
#expect(firstSpan.count == 4)
88+
89+
// Verify contents of the extracted span
90+
let firstValue = try UInt16(parsingBigEndian: &firstSpan)
91+
let secondValue = try UInt16(parsingBigEndian: &firstSpan)
92+
#expect(firstValue == 1)
93+
#expect(secondValue == 2)
94+
#expect(firstSpan.count == 0)
95+
96+
// 1 object of 4 bytes
97+
var secondSpan = try input.extract(objectStride: 4, objectCount: 1)
98+
#expect(secondSpan.startPosition == 0) // Extracted spans start at 0
99+
#expect(secondSpan.count == 4)
100+
101+
// Verify contents of the second extract
102+
let thirdValue = try UInt32(parsingBigEndian: &secondSpan)
103+
#expect(thirdValue == 0x0003_0004)
104+
#expect(secondSpan.count == 0)
105+
106+
// Input position should advance
107+
#expect(input.startPosition == 8)
108+
#expect(input.count == 8)
109+
110+
// objectCount == 0 (should create an empty extracted span)
111+
let emptySpan = try input.extract(objectStride: 2, objectCount: 0)
112+
#expect(emptySpan.count == 0)
113+
#expect(emptySpan.startPosition == 0)
114+
115+
// objectStride == 0 (should create an empty extracted span)
116+
let emptySpan2 = try input.extract(objectStride: 0, objectCount: 5)
117+
#expect(emptySpan2.count == 0)
118+
#expect(emptySpan2.startPosition == 0)
119+
120+
#expect(throws: ParsingError.self) {
121+
_ = try input.extract(objectStride: 3, objectCount: 3)
122+
}
123+
#expect(input.startPosition == 8)
124+
#expect(throws: ParsingError.self) {
125+
_ = try input.extract(objectStride: -1, objectCount: 2)
126+
}
127+
#expect(throws: ParsingError.self) {
128+
_ = try input.extract(objectStride: 2, objectCount: -1)
129+
}
130+
#expect(throws: ParsingError.self) {
131+
_ = try input.extract(objectStride: Int.max, objectCount: 2)
132+
}
133+
}
134+
135+
// Test with empty buffer
136+
try emptyBuffer.withParserSpan { input in
137+
let emptySpan = try input.extract(objectStride: 4, objectCount: 0)
138+
#expect(emptySpan.count == 0)
139+
#expect(emptySpan.startPosition == 0)
140+
141+
#expect(throws: ParsingError.self) {
142+
_ = try input.extract(objectStride: 1, objectCount: 1)
143+
}
144+
}
145+
}
146+
147+
@Test func extractRemaining() throws {
148+
try buffer.withParserSpan { input in
149+
// Advance to a position within the buffer
150+
try input.seek(toRelativeOffset: 6)
151+
152+
var remainingSpan = input.extractRemaining()
153+
#expect(remainingSpan.startPosition == 0) // Extracted spans start at 0
154+
#expect(remainingSpan.count == 10) // 16 - 6 = 10 bytes remaining
155+
156+
// Verify that original input is consumed & reset
157+
#expect(input.count == 0)
158+
159+
// Verify we can parse the extracted remaining data
160+
let value1 = try UInt16(parsingBigEndian: &remainingSpan)
161+
let value2 = try UInt16(parsingBigEndian: &remainingSpan)
162+
#expect(value1 == 4)
163+
#expect(value2 == 5)
164+
#expect(remainingSpan.count == 6)
165+
166+
// Reset to beginning and extract all
167+
try input.seek(toAbsoluteOffset: 0)
168+
var fullSpan = input.extractRemaining()
169+
#expect(fullSpan.startPosition == 0)
170+
#expect(fullSpan.count == 16)
171+
#expect(input.count == 0)
172+
173+
// Parse a few values to verify it contains the full buffer data
174+
let fullValue1 = try UInt16(parsingBigEndian: &fullSpan)
175+
let fullValue2 = try UInt16(parsingBigEndian: &fullSpan)
176+
#expect(fullValue1 == 1)
177+
#expect(fullValue2 == 2)
178+
}
179+
180+
// Test with empty buffer
181+
emptyBuffer.withParserSpan { input in
182+
let emptySpan = input.extractRemaining()
183+
#expect(emptySpan.startPosition == 0)
184+
#expect(emptySpan.count == 0)
185+
#expect(input.count == 0)
186+
}
187+
}
188+
189+
@Test func extractSliceSemantics() throws {
190+
try buffer.withParserSpan { input in
191+
// Create slice and extract of the same data
192+
try input.seek(toAbsoluteOffset: 4)
193+
var slicedSpan = try input.sliceSpan(byteCount: 4)
194+
try input.seek(toAbsoluteOffset: 4) // Go back to same position
195+
var extractedSpan = try input.extract(byteCount: 4)
196+
197+
// Both should have same count...
198+
#expect(slicedSpan.count == 4)
199+
#expect(extractedSpan.count == 4)
200+
201+
// ...but different start positions
202+
#expect(slicedSpan.startPosition == 4)
203+
#expect(extractedSpan.startPosition == 0)
204+
205+
// Both should parse the same values
206+
let sliceValue = try UInt32(parsingBigEndian: &slicedSpan)
207+
let extractValue = try UInt32(parsingBigEndian: &extractedSpan)
208+
#expect(sliceValue == 0x0003_0004)
209+
#expect(sliceValue == extractValue)
210+
}
211+
}
212+
}

0 commit comments

Comments
 (0)