@@ -11,19 +11,8 @@ use super::*;
11
11
use anyhow:: { anyhow, Result } ;
12
12
use regex:: Regex ;
13
13
14
- #[ derive( Debug , Clone ) ]
15
- pub struct NginxListingParser {
16
- metadata_regex : Regex ,
17
- }
18
-
19
- impl Default for NginxListingParser {
20
- fn default ( ) -> Self {
21
- Self {
22
- metadata_regex : Regex :: new ( r"(\d{2}-\w{3}-\d{4} \d{2}:\d{2})\s+([\d\.\-kMG]+)$" )
23
- . unwrap ( ) ,
24
- }
25
- }
26
- }
14
+ #[ derive( Debug , Clone , Default ) ]
15
+ pub struct NginxListingParser { }
27
16
28
17
impl Parser for NginxListingParser {
29
18
fn get_list ( & self , client : & reqwest:: blocking:: Client , url : & url:: Url ) -> Result < ListResult > {
@@ -34,11 +23,17 @@ impl Parser for NginxListingParser {
34
23
let document = Html :: parse_document ( & body) ;
35
24
let selector = Selector :: parse ( "a" ) . unwrap ( ) ;
36
25
let mut items = Vec :: new ( ) ;
26
+ let mut date_fmt = None ;
27
+ let mut date_regex = None ;
37
28
for element in document. select ( & selector) {
38
29
let href = match element. value ( ) . attr ( "href" ) {
39
30
Some ( href) => href,
40
31
None => continue ,
41
32
} ;
33
+ if href. starts_with ( '?' ) {
34
+ // Apache autoindex commands, skip.
35
+ continue ;
36
+ }
42
37
// It's not proper to get filename by <a> text
43
38
// As when it is too long, this could happen:
44
39
// ceph-immutable-object-cache_17.2.6-pve1+3_amd64..> 03-May-2023 23:52 150048
@@ -55,6 +50,11 @@ impl Parser for NginxListingParser {
55
50
if name == ".." {
56
51
continue ;
57
52
}
53
+ // extra check for Apache server
54
+ let inner = element. inner_html ( ) ;
55
+ if inner == "Parent Directory" {
56
+ continue ;
57
+ }
58
58
let type_ = if href. as_str ( ) . ends_with ( '/' ) {
59
59
FileType :: Directory
60
60
} else {
@@ -69,12 +69,25 @@ impl Parser for NginxListingParser {
69
69
. to_string ( ) ;
70
70
let metadata_raw = metadata_raw. trim ( ) ;
71
71
debug ! ( "{:?}" , metadata_raw) ;
72
- let metadata = self . metadata_regex . captures ( metadata_raw) . ok_or ( anyhow ! (
73
- "Get '{}' for metadata, is this a nginx page?" ,
74
- metadata_raw
75
- ) ) ?;
72
+ // guess date format...
73
+ if date_fmt. is_none ( ) {
74
+ let ( f, r) = guess_date_fmt ( metadata_raw) ;
75
+ date_fmt = Some ( f) ;
76
+ date_regex = Some ( Regex :: new ( & format ! ( r"({})\s+([\d\.\-kKMG]+)$" , r) ) ?) ;
77
+ debug ! ( "date_fmt: {:?} date_regex: {:?}" , date_fmt, date_regex)
78
+ }
79
+ let metadata = date_regex
80
+ . clone ( )
81
+ . unwrap ( )
82
+ . captures ( metadata_raw)
83
+ . ok_or ( anyhow ! (
84
+ "Get '{}' for {} ({}) metadata, is this a nginx page?" ,
85
+ metadata_raw,
86
+ name,
87
+ href
88
+ ) ) ?;
76
89
let date = metadata. get ( 1 ) . unwrap ( ) . as_str ( ) ;
77
- let date = NaiveDateTime :: parse_from_str ( date, "%d-%b-%Y %H:%M" ) ?;
90
+ let date = NaiveDateTime :: parse_from_str ( date, & date_fmt . clone ( ) . unwrap ( ) ) ?;
78
91
let size = metadata. get ( 2 ) . unwrap ( ) . as_str ( ) ;
79
92
debug ! ( "{} {} {:?} {} {:?}" , href, name, type_, date, size) ;
80
93
items. push ( ListItem :: new (
@@ -84,7 +97,11 @@ impl Parser for NginxListingParser {
84
97
{
85
98
if size == "-" {
86
99
None
87
- } else if size. contains ( 'k' ) || size. contains ( 'M' ) || size. contains ( 'G' ) {
100
+ } else if size. contains ( 'k' )
101
+ || size. contains ( 'K' )
102
+ || size. contains ( 'M' )
103
+ || size. contains ( 'G' )
104
+ {
88
105
let ( n_size, unit) = FileSize :: get_humanized ( size) ;
89
106
Some ( FileSize :: HumanizedBinary ( n_size, unit) )
90
107
} else {
@@ -101,8 +118,11 @@ impl Parser for NginxListingParser {
101
118
102
119
#[ cfg( test) ]
103
120
mod tests {
121
+ use test_log:: test;
104
122
use url:: Url ;
105
123
124
+ use crate :: listing:: SizeUnit ;
125
+
106
126
use super :: * ;
107
127
108
128
#[ test]
@@ -182,4 +202,38 @@ mod tests {
182
202
_ => unreachable ! ( ) ,
183
203
}
184
204
}
205
+
206
+ #[ test]
207
+ fn test_ghettoforge ( ) {
208
+ let client = reqwest:: blocking:: Client :: new ( ) ;
209
+ let items = NginxListingParser :: default ( )
210
+ . get_list (
211
+ & client,
212
+ & url:: Url :: parse ( "http://localhost:1921/ghettoforge" ) . unwrap ( ) ,
213
+ )
214
+ . unwrap ( ) ;
215
+ match items {
216
+ ListResult :: List ( items) => {
217
+ assert_eq ! ( items. len( ) , 8 ) ;
218
+ assert_eq ! ( items[ 0 ] . name, "RPM-GPG-KEY-gf.el7" ) ;
219
+ assert_eq ! ( items[ 0 ] . type_, FileType :: File ) ;
220
+ assert_eq ! (
221
+ items[ 0 ] . size,
222
+ Some ( FileSize :: HumanizedBinary ( 3.0 , SizeUnit :: K ) )
223
+ ) ;
224
+ assert_eq ! (
225
+ items[ 0 ] . mtime,
226
+ NaiveDateTime :: parse_from_str( "2014-12-30 02:53" , "%Y-%m-%d %H:%M" ) . unwrap( )
227
+ ) ;
228
+ assert_eq ! ( items[ 3 ] . name, "archive" ) ;
229
+ assert_eq ! ( items[ 3 ] . type_, FileType :: Directory ) ;
230
+ assert_eq ! ( items[ 3 ] . size, None ) ;
231
+ assert_eq ! (
232
+ items[ 3 ] . mtime,
233
+ NaiveDateTime :: parse_from_str( "2020-12-21 02:34" , "%Y-%m-%d %H:%M" ) . unwrap( )
234
+ ) ;
235
+ }
236
+ _ => unreachable ! ( ) ,
237
+ }
238
+ }
185
239
}
0 commit comments