-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfeedchecker.rb
More file actions
executable file
·168 lines (147 loc) · 4.62 KB
/
feedchecker.rb
File metadata and controls
executable file
·168 lines (147 loc) · 4.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env ruby
# == Synopsis
#
# This is a simple, multi-threaded script which takes an opml file and
# checks all contained feeds for errors, redirects and so on.
# It is also able to partially detect orphaned feeds.
#
# == Examples
#
# feedchecker.rb -i feeds.opml
#
# Other examples:
# feedchecker.rb -t 60 -i input.opml
# feedchecker.rb -a 365 -t 60 -i input.opml
#
# == Usage
# feedchecker.rb [options] -i input.opml
#
# For help use: feedchecker.rb -h
#
# == Options
#
# ./feedchecker.rb --help
# This is a simple, script which takes an opml file and checks all contained feeds for
# errors.
#
# Usage:
#
# feedchecker.rb [options] -i <filename>
#
# where [options] are:
# --input, -i <s>: Input opml file
# --timeout, -t <i>: Timeout interval in seconds (default: 60)
# --age, -a <i>: Specify the minimum age in days (default: 365)
# --fetchparallel, -f <i>: Specify the amount of feeds to fetch parallel (default: 5)
# --version, -v: Print version and exit
# --help, -h: Show this message
#
# == Author
# Sven Pfleiderer
#
# == Copyright
# Copyright (c) 2009 Sven Pfleiderer. Licensed under GPL Version 2:
# http://www.gnu.org/licenses/gpl-2.0.html
require 'rubygems'
require 'net/http'
require 'net/https'
require 'uri'
require 'rexml/document'
require 'rss/1.0'
require 'rss/2.0'
require 'open-uri'
require 'date'
require 'trollop'
require 'peach'
class Feedchecker
def initialize(options)
@options = options
end
def check_feeds
responses = read_opml.pmap(@options[:fetchparallel]) do |feed|
get_response(feed)
end
output = responses.find_all { |item| !item.nil? }
output.sort.each { |out| puts out}
end
private
def get_response(url)
check = String.new
begin
ht = URI.parse(url)
timeout(@options[:timeout]) do
Net::HTTP.start(ht.host, ht.port) do |http|
response = http.head(ht.request_uri)
check = case response
when Net::HTTPRedirection then " Redirect ... new URI: #{response['location']}"
when Net::HTTPForbidden then " Forbidden ... check URI"
when Net::HTTPNotFound then " Not found ... check URI"
end
end
end
rescue TimeoutError, Errno::ETIMEDOUT
check = " Connection timed out"
rescue SocketError
check = " #{ht.host} not found"
rescue Errno::ECONNRESET, Errno::EPIPE, Errno::ECONNREFUSED
check = " Connection to #{ht.host} failed!"
rescue Net::HTTPBadResponse
check = " #{ht.host} sends bad HTTP data"
end
check = check_age(url) unless check
url + check if check
end
def check_age(url)
date_now = Time.now
content = String.new
begin
open(url) do |s| content = s.read end
rss = RSS::Parser.parse(content, false)
if rss
feedage = ((date_now - rss.items.first.date).to_i)/(60 * 60 * 24)
return " is out of date. Age: #{feedage} days without an update" if feedage > @options[:age]
end
rescue NameError, TypeError, OpenURI::HTTPError
return " age could not be checked"
rescue RSS::NotWellFormedError
return " feed isn't well formed and could't be parsed"
end
end
def parse_opml(document)
feeds = Array.new
REXML::XPath.each(document, "//outline[@htmlUrl]") do |fe|
feeds << fe.attributes['xmlUrl']
end
feeds
end
def read_opml
begin
opml_file = File.read(@options[:input])
opml_document = REXML::Document.new(opml_file)
feeds = parse_opml(opml_document)
rescue NoMethodError
puts "File #{@options[:input]} could not be parsed!"
rescue
puts "File #{@options[:input]} not found!"
end
return feeds if feeds
end
end
options = Trollop::options do
version "feedchecker.rb 0.4 (c) 2009 Sven Pfleiderer"
banner <<-EOS
This is a simple, script which takes an opml file and checks all contained feeds for errors.
Usage:
feedchecker.rb [options] -i <filename>
where [options] are:
EOS
opt :input, "Input opml file", :type => String
opt :timeout, "Timeout interval in seconds", :default => 60
opt :age, "Specify the minimum age in days", :default => 365
opt :fetchparallel, "Specify the amount of feeds to fetch parallel", :default => 5
end
if (options[:input].nil? or !File.exist?(options[:input]))
Trollop::die "must specify an existant input file"
end
checker = Feedchecker.new(options)
checker.check_feeds