@@ -24,7 +24,7 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_LoadSitemapDirective()
2424" ;
2525 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
2626
27- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
27+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
2828 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
2929
3030 var robotsTxt = await _parser . ReadFromStreamAsync ( stream ) ;
@@ -37,6 +37,7 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_LoadSitemapDirective()
3737 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
3838 new Uri ( "https://www.github.com/sitemap.xml" ) ,
3939 null ,
40+ null ,
4041 default ) , Times . Once ) ;
4142 }
4243
@@ -53,7 +54,7 @@ public async Task LoadSitemapAsync_MultipleSitemapDirectivesTopOfFile_LoadMultip
5354" ;
5455 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
5556
56- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
57+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
5758 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
5859
5960 var robotsTxt = await _parser . ReadFromStreamAsync ( stream ) ;
@@ -66,10 +67,12 @@ public async Task LoadSitemapAsync_MultipleSitemapDirectivesTopOfFile_LoadMultip
6667 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
6768 new Uri ( "https://www.github.com/sitemap.xml" ) ,
6869 null ,
70+ null ,
6971 default ) , Times . Once ) ;
7072 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
7173 new Uri ( "https://www.github.com/sitemap-2.xml" ) ,
7274 null ,
75+ null ,
7376 default ) , Times . Once ) ;
7477 }
7578
@@ -85,7 +88,7 @@ public async Task LoadSitemapAsync_MultipleSitemapDirectivesUnderUserAgent_LoadM
8588Sitemap: https://www.github.com/sitemap-2.xml" ;
8689 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
8790
88- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
91+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
8992 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
9093
9194 var robotsTxt = await _parser . ReadFromStreamAsync ( stream ) ;
@@ -98,10 +101,12 @@ public async Task LoadSitemapAsync_MultipleSitemapDirectivesUnderUserAgent_LoadM
98101 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
99102 new Uri ( "https://www.github.com/sitemap.xml" ) ,
100103 null ,
104+ null ,
101105 default ) , Times . Once ) ;
102106 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
103107 new Uri ( "https://www.github.com/sitemap-2.xml" ) ,
104108 null ,
109+ null ,
105110 default ) , Times . Once ) ;
106111 }
107112
@@ -118,7 +123,7 @@ public async Task LoadSitemapAsync_MultipleSitemapDirectives_RetrieveOneIfDuplic
118123" ;
119124 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
120125
121- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
126+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
122127 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
123128
124129 var robotsTxt = await _parser . ReadFromStreamAsync ( stream ) ;
@@ -131,6 +136,50 @@ public async Task LoadSitemapAsync_MultipleSitemapDirectives_RetrieveOneIfDuplic
131136 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
132137 new Uri ( "https://www.github.com/sitemap.xml" ) ,
133138 null ,
139+ null ,
140+ default ) , Times . Once ) ;
141+ }
142+
143+ [ Fact ]
144+ public async Task LoadSitemapAsync_MultipleSitemapDirectives_OnlyLoadDirectivesMatchingFilter ( )
145+ {
146+ // Arrange
147+ var file =
148+ @"Sitemap: https://www.github.com/sitemap-products.xml
149+ Sitemap: https://www.github.com/sitemap-categories.xml
150+ Sitemap: https://www.github.com/sitemap-brands.xml
151+
152+ User-agent: *
153+ Disallow: /
154+ " ;
155+ await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
156+
157+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
158+ . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
159+
160+ Func < Uri , bool > sitemapLocationFilter = location => location . AbsolutePath . Contains ( "brands" ) ;
161+
162+ var robotsTxt = await _parser . ReadFromStreamAsync ( stream ) ;
163+
164+ // Act
165+ await robotsTxt . LoadSitemapAsync ( sitemapLocationFilter : sitemapLocationFilter ) . ToListAsync ( ) ;
166+
167+ // Assert
168+ robotsTxt . Should ( ) . NotBe ( null ) ;
169+ _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
170+ new Uri ( "https://www.github.com/sitemap-products.xml" ) ,
171+ It . IsAny < DateTime ? > ( ) ,
172+ It . IsAny < Func < Uri , bool > > ( ) ,
173+ It . IsAny < CancellationToken > ( ) ) , Times . Never ) ;
174+ _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
175+ new Uri ( "https://www.github.com/sitemap-categories.xml" ) ,
176+ It . IsAny < DateTime ? > ( ) ,
177+ It . IsAny < Func < Uri , bool > > ( ) ,
178+ It . IsAny < CancellationToken > ( ) ) , Times . Never ) ;
179+ _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
180+ new Uri ( "https://www.github.com/sitemap-brands.xml" ) ,
181+ null ,
182+ sitemapLocationFilter ,
134183 default ) , Times . Once ) ;
135184 }
136185
@@ -146,7 +195,7 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_PassModifiedDate()
146195" ;
147196 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
148197
149- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
198+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
150199 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
151200
152201 var modifiedDate = new DateTime ( 2023 , 01 , 01 ) ;
@@ -161,6 +210,38 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_PassModifiedDate()
161210 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
162211 It . IsAny < Uri > ( ) ,
163212 modifiedDate ,
213+ null ,
214+ default ) , Times . Once ) ;
215+ }
216+
217+ [ Fact ]
218+ public async Task LoadSitemapAsync_SitemapDirectiveExists_PassSitemapLocationFilter ( )
219+ {
220+ // Arrange
221+ var file =
222+ @"Sitemap: https://www.github.com/sitemap.xml
223+
224+ User-agent: *
225+ Disallow: /
226+ " ;
227+ await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
228+
229+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
230+ . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
231+
232+ Func < Uri , bool > sitemapLocationFilter = location => location . AbsolutePath == "/sitemap.xml" || location . AbsolutePath . Contains ( "product" ) ;
233+
234+ var robotsTxt = await _parser . ReadFromStreamAsync ( stream ) ;
235+
236+ // Act
237+ await robotsTxt . LoadSitemapAsync ( sitemapLocationFilter : sitemapLocationFilter ) . ToListAsync ( ) ;
238+
239+ // Assert
240+ robotsTxt . Should ( ) . NotBe ( null ) ;
241+ _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
242+ It . IsAny < Uri > ( ) ,
243+ null ,
244+ sitemapLocationFilter ,
164245 default ) , Times . Once ) ;
165246 }
166247
@@ -176,7 +257,7 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_PassCancellationToken(
176257" ;
177258 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
178259
179- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
260+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
180261 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
181262
182263 using var cancellationTokenSource = new CancellationTokenSource ( ) ;
@@ -192,6 +273,7 @@ public async Task LoadSitemapAsync_SitemapDirectiveExists_PassCancellationToken(
192273 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
193274 It . IsAny < Uri > ( ) ,
194275 null ,
276+ null ,
195277 cancellationToken ) , Times . Once ) ;
196278 }
197279
@@ -205,7 +287,7 @@ public async Task LoadSitemapAsync_NoSitemapDirective_TryLoadDefaultSitemapIfNon
205287" ;
206288 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
207289
208- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
290+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
209291 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
210292
211293 var robotsTxt = await _parser . ReadFromStreamAsync ( stream ) ;
@@ -218,6 +300,7 @@ public async Task LoadSitemapAsync_NoSitemapDirective_TryLoadDefaultSitemapIfNon
218300 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
219301 new Uri ( "https://www.github.com/sitemap.xml" ) ,
220302 null ,
303+ null ,
221304 default ) , Times . Once ) ;
222305 }
223306
@@ -231,7 +314,7 @@ public async Task LoadSitemapAsync_NoSitemapDirective_PassModifiedDate()
231314" ;
232315 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
233316
234- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
317+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
235318 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
236319
237320 var modifiedDate = new DateTime ( 2023 , 01 , 01 ) ;
@@ -246,6 +329,36 @@ public async Task LoadSitemapAsync_NoSitemapDirective_PassModifiedDate()
246329 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
247330 It . IsAny < Uri > ( ) ,
248331 modifiedDate ,
332+ null ,
333+ default ) , Times . Once ) ;
334+ }
335+
336+ [ Fact ]
337+ public async Task LoadSitemapAsync_NoSitemapDirective_PassSitemapLocationFilter ( )
338+ {
339+ // Arrange
340+ var file =
341+ @"User-agent: *
342+ Disallow: /
343+ " ;
344+ await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
345+
346+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
347+ . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
348+
349+ Func < Uri , bool > sitemapLocationFilter = location => location . AbsolutePath == "/sitemap.xml" || location . AbsolutePath . Contains ( "product" ) ;
350+
351+ var robotsTxt = await _parser . ReadFromStreamAsync ( stream ) ;
352+
353+ // Act
354+ await robotsTxt . LoadSitemapAsync ( sitemapLocationFilter : sitemapLocationFilter ) . ToListAsync ( ) ;
355+
356+ // Assert
357+ robotsTxt . Should ( ) . NotBe ( null ) ;
358+ _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
359+ It . IsAny < Uri > ( ) ,
360+ null ,
361+ sitemapLocationFilter ,
249362 default ) , Times . Once ) ;
250363 }
251364
@@ -259,7 +372,7 @@ public async Task LoadSitemapAsync_NoSitemapDirective_PassCancellationToken()
259372" ;
260373 await using var stream = new MemoryStream ( Encoding . UTF8 . GetBytes ( file ) ) ;
261374
262- _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < CancellationToken > ( ) ) )
375+ _robotsClientMock . Setup ( callTo => callTo . LoadSitemapsAsync ( It . IsAny < Uri > ( ) , It . IsAny < DateTime ? > ( ) , It . IsAny < Func < Uri , bool > > ( ) , It . IsAny < CancellationToken > ( ) ) )
263376 . Returns ( Enumerable . Empty < UrlSetItem > ( ) . ToAsyncEnumerable ( ) ) ;
264377
265378 using var cancellationTokenSource = new CancellationTokenSource ( ) ;
@@ -275,6 +388,7 @@ public async Task LoadSitemapAsync_NoSitemapDirective_PassCancellationToken()
275388 _robotsClientMock . Verify ( callTo => callTo . LoadSitemapsAsync (
276389 It . IsAny < Uri > ( ) ,
277390 null ,
391+ null ,
278392 cancellationToken ) , Times . Once ) ;
279393 }
280394}
0 commit comments