@@ -17,12 +17,41 @@ import (
17
17
"github.com/ipld/go-ipld-prime/traversal"
18
18
)
19
19
20
+ // TraverseResumer allows resuming a progress from a previously encountered path in the selector.
21
+ type TraverseResumer interface {
22
+ RewindToPath (from datamodel.Path ) error
23
+ RewindToOffset (offset uint64 ) error
24
+ Position () uint64
25
+ }
26
+
27
+ // TraversalResumerPathState tracks a traversal state for the purpose of
28
+ // building a CAR. For each block in the CAR it tracks the path to that block,
29
+ // the Link of the block and where in the CAR the block is located.
30
+ //
31
+ // A TraversalResumerPathState shared across multiple traversals using the same
32
+ // selector and DAG will yield the same state. This allows us to resume at
33
+ // arbitrary points within in the DAG and load the minimal additional blocks
34
+ // required to resume the traversal at that point.
35
+ type TraversalResumerPathState interface {
36
+ AddPath (path []datamodel.PathSegment , link datamodel.Link , atOffset uint64 )
37
+ GetLinks (root datamodel.Path ) []datamodel.Link
38
+ GetOffsetAfter (root datamodel.Path ) (uint64 , error )
39
+ }
40
+
20
41
type pathNode struct {
21
42
link datamodel.Link
22
43
offset uint64
23
44
children map [datamodel.PathSegment ]* pathNode
24
45
}
25
46
47
+ // NewTraversalResumerPathState creates a new TraversalResumerPathState.
48
+ //
49
+ // Note that the TraversalResumerPathState returned by this factory is not
50
+ // thread-safe.
51
+ func NewTraversalResumerPathState () TraversalResumerPathState {
52
+ return newPath (nil , 0 )
53
+ }
54
+
26
55
func newPath (link datamodel.Link , at uint64 ) * pathNode {
27
56
return & pathNode {
28
57
link : link ,
@@ -31,15 +60,15 @@ func newPath(link datamodel.Link, at uint64) *pathNode {
31
60
}
32
61
}
33
62
34
- func (pn pathNode ) addPath (p []datamodel.PathSegment , link datamodel.Link , at uint64 ) {
63
+ func (pn pathNode ) AddPath (p []datamodel.PathSegment , link datamodel.Link , atOffset uint64 ) {
35
64
if len (p ) == 0 {
36
65
return
37
66
}
38
67
if _ , ok := pn .children [p [0 ]]; ! ok {
39
- child := newPath (link , at )
68
+ child := newPath (link , atOffset )
40
69
pn .children [p [0 ]] = child
41
70
}
42
- pn .children [p [0 ]].addPath (p [1 :], link , at )
71
+ pn .children [p [0 ]].AddPath (p [1 :], link , atOffset )
43
72
}
44
73
45
74
func (pn pathNode ) allLinks () []datamodel.Link {
@@ -57,7 +86,7 @@ func (pn pathNode) allLinks() []datamodel.Link {
57
86
}
58
87
59
88
// getPaths returns reconstructed paths in the tree rooted at 'root'
60
- func (pn pathNode ) getLinks (root datamodel.Path ) []datamodel.Link {
89
+ func (pn pathNode ) GetLinks (root datamodel.Path ) []datamodel.Link {
61
90
segs := root .Segments ()
62
91
switch len (segs ) {
63
92
case 0 :
@@ -80,12 +109,12 @@ func (pn pathNode) getLinks(root datamodel.Path) []datamodel.Link {
80
109
// base case 2: not registered sub-path.
81
110
return []datamodel.Link {}
82
111
}
83
- return pn .children [next ].getLinks (datamodel .NewPathNocopy (segs [1 :]))
112
+ return pn .children [next ].GetLinks (datamodel .NewPathNocopy (segs [1 :]))
84
113
}
85
114
86
115
var errInvalid = fmt .Errorf ("invalid path" )
87
116
88
- func (pn pathNode ) offsetAfter (root datamodel.Path ) (uint64 , error ) {
117
+ func (pn pathNode ) GetOffsetAfter (root datamodel.Path ) (uint64 , error ) {
89
118
// we look for offset of next sibling.
90
119
// if no next sibling recurse up the path segments until we find a next sibling.
91
120
segs := root .Segments ()
@@ -100,7 +129,7 @@ func (pn pathNode) offsetAfter(root datamodel.Path) (uint64, error) {
100
129
closest := chld .offset
101
130
// try recursive path
102
131
if len (segs ) > 1 {
103
- co , err := chld .offsetAfter (datamodel .NewPathNocopy (segs [1 :]))
132
+ co , err := chld .GetOffsetAfter (datamodel .NewPathNocopy (segs [1 :]))
104
133
if err == nil {
105
134
return co , err
106
135
}
@@ -121,35 +150,28 @@ func (pn pathNode) offsetAfter(root datamodel.Path) (uint64, error) {
121
150
return 0 , errInvalid
122
151
}
123
152
124
- // TraverseResumer allows resuming a progress from a previously encountered path in the selector.
125
- type TraverseResumer interface {
126
- RewindToPath (from datamodel.Path ) error
127
- RewindToOffset (offset uint64 ) error
128
- Position () uint64
129
- }
130
-
131
153
type traversalState struct {
132
154
wrappedLinksystem * linking.LinkSystem
133
155
lsCounter * loader.Counter
134
- blockNumber int
135
- pathOrder map [int ]datamodel.Path
136
- pathTree * pathNode
156
+ pathTree TraversalResumerPathState
137
157
rewindPathTarget * datamodel.Path
138
158
rewindOffsetTarget uint64
139
159
pendingBlockStart uint64 // on rewinds, we store where the counter was in order to know the length of the last read block.
140
160
progress * traversal.Progress
141
161
}
142
162
163
+ var _ TraverseResumer = (* traversalState )(nil )
164
+
143
165
func (ts * traversalState ) RewindToPath (from datamodel.Path ) error {
144
166
if ts .progress == nil {
145
167
return nil
146
168
}
147
169
// reset progress and traverse until target.
148
170
ts .progress .SeenLinks = make (map [datamodel.Link ]struct {})
149
- ts .blockNumber = 0
150
171
ts .pendingBlockStart = ts .lsCounter .Size ()
151
172
ts .lsCounter .TotalRead = 0
152
173
ts .rewindPathTarget = & from
174
+ ts .rewindOffsetTarget = 0
153
175
return nil
154
176
}
155
177
@@ -163,10 +185,10 @@ func (ts *traversalState) RewindToOffset(offset uint64) error {
163
185
}
164
186
// reset progress and traverse until target.
165
187
ts .progress .SeenLinks = make (map [datamodel.Link ]struct {})
166
- ts .blockNumber = 0
167
188
ts .pendingBlockStart = ts .lsCounter .Size ()
168
189
ts .lsCounter .TotalRead = 0
169
190
ts .rewindOffsetTarget = offset
191
+ ts .rewindPathTarget = nil
170
192
return nil
171
193
}
172
194
@@ -177,9 +199,7 @@ func (ts *traversalState) Position() uint64 {
177
199
func (ts * traversalState ) traverse (lc linking.LinkContext , l ipld.Link ) (io.Reader , error ) {
178
200
// when not in replay mode, we track metadata
179
201
if ts .rewindPathTarget == nil && ts .rewindOffsetTarget == 0 {
180
- ts .pathOrder [ts .blockNumber ] = lc .LinkPath
181
- ts .pathTree .addPath (lc .LinkPath .Segments (), l , ts .lsCounter .Size ())
182
- ts .blockNumber ++
202
+ ts .pathTree .AddPath (lc .LinkPath .Segments (), l , ts .lsCounter .Size ())
183
203
return ts .wrappedLinksystem .StorageReadOpener (lc , l )
184
204
}
185
205
@@ -205,12 +225,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
205
225
break
206
226
}
207
227
if targetSegments [i ].String () != s .String () {
208
- links := ts .pathTree .getLinks (datamodel .NewPathNocopy (seg [0 : i + 1 ]))
228
+ links := ts .pathTree .GetLinks (datamodel .NewPathNocopy (seg [0 : i + 1 ]))
209
229
for _ , l := range links {
210
230
ts .progress .SeenLinks [l ] = struct {}{}
211
231
}
212
232
var err error
213
- ts .lsCounter .TotalRead , err = ts .pathTree .offsetAfter (datamodel .NewPathNocopy (seg [0 : i + 1 ]))
233
+ ts .lsCounter .TotalRead , err = ts .pathTree .GetOffsetAfter (datamodel .NewPathNocopy (seg [0 : i + 1 ]))
214
234
if err == errInvalid {
215
235
ts .lsCounter .TotalRead = ts .pendingBlockStart
216
236
} else if err != nil {
@@ -222,12 +242,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
222
242
}
223
243
}
224
244
if ts .rewindOffsetTarget != 0 {
225
- links := ts .pathTree .getLinks (lc .LinkPath )
245
+ links := ts .pathTree .GetLinks (lc .LinkPath )
226
246
for _ , l := range links {
227
247
ts .progress .SeenLinks [l ] = struct {}{}
228
248
}
229
249
var err error
230
- ts .lsCounter .TotalRead , err = ts .pathTree .offsetAfter (lc .LinkPath )
250
+ ts .lsCounter .TotalRead , err = ts .pathTree .GetOffsetAfter (lc .LinkPath )
231
251
if err == errInvalid {
232
252
ts .lsCounter .TotalRead = ts .pendingBlockStart
233
253
} else if err != nil {
@@ -243,13 +263,12 @@ func (ts *traversalState) traverse(lc linking.LinkContext, l ipld.Link) (io.Read
243
263
// WithTraversingLinksystem extends a progress for traversal such that it can
244
264
// subsequently resume and perform subsets of the walk efficiently from
245
265
// an arbitrary position within the selector traversal.
246
- func WithTraversingLinksystem (p * traversal.Progress ) (TraverseResumer , error ) {
266
+ func WithTraversingLinksystem (p * traversal.Progress , pathState TraversalResumerPathState ) (TraverseResumer , error ) {
247
267
wls , ctr := loader .CountingLinkSystem (p .Cfg .LinkSystem )
248
268
ts := & traversalState {
249
269
wrappedLinksystem : & wls ,
250
270
lsCounter : ctr .(* loader.Counter ),
251
- pathOrder : make (map [int ]datamodel.Path ),
252
- pathTree : newPath (nil , 0 ),
271
+ pathTree : pathState ,
253
272
progress : p ,
254
273
}
255
274
p .Cfg .LinkSystem .StorageReadOpener = ts .traverse
0 commit comments