Skip to content

Commit b6b8095

Browse files
committed
fix github projects recipe
1 parent 2292694 commit b6b8095

1 file changed

Lines changed: 24 additions & 20 deletions

File tree

recipes/github_projects.rb

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
# encoding: utf-8
22

33
# Instructions:
4-
# The README pages for this recipe are scraped from https://github.com/danchoi/kindlefodder/wiki/Github-READMEs-and-pages-for-the-GitHub-project-docs-recipe
5-
# You can edit that wiki page
4+
# The README pages for this recipe are scraped from
5+
# https://github.com/danchoi/kindlefodder/wiki/Github-READMEs-and-pages-for-the-GitHub-project-docs-recipe
6+
# You can edit that wiki page, add more popular repositories that you would like to scrape
7+
# and generate into a mobi format
68

79
require 'kindlefodder'
810

911
class GithubProjects < Kindlefodder
10-
11-
WIKIPAGE = "https://github.com/danchoi/kindlefodder/wiki/Github-READMEs-for-the-github_projects.rb-recipe"
12+
WIKIPAGE = 'https://github.com/danchoi/kindlefodder/wiki/Github-READMEs-for-the-github_projects.rb-recipe'
1213

1314
def get_source_files
14-
@urls = Nokogiri::HTML(`curl -Ls "#{WIKIPAGE}"`).search("#wiki-body h2").inject({}) {|m, h2|
15-
m[h2.inner_text] = h2.xpath("./following-sibling::ul[1]/li").map {|li| li.inner_text}
16-
m
17-
}
15+
@urls = Nokogiri::HTML(`curl -Ls "#{WIKIPAGE}"`).
16+
search("#wiki-body h2").
17+
inject({}) do |m, h2|
18+
m[h2.inner_text] = h2.xpath("./following-sibling::ul[1]/li").map { |li| li.inner_text }
19+
m
20+
end
1821
puts @urls.to_yaml
1922
sections = extract_sections
2023
puts sections.inspect
@@ -31,33 +34,34 @@ def document
3134
end
3235

3336
def extract_sections
34-
sections = @urls.map { |(title, urls)|
35-
{ title: title,
36-
articles: urls.map {|url|
37+
@urls.map do |(title, urls)|
38+
{
39+
title: title,
40+
articles: urls.map do |url|
3741
html = run_shell_command("curl -s #{url}")
38-
html = html.force_encoding('utf-8')
39-
doc = Nokogiri::HTML html
40-
title = doc.at('title').inner_text.sub(/ - GitHub$/,'')
42+
html = html.force_encoding(Encoding::UTF_8)
43+
doc = Nokogiri::HTML(html)
44+
title = doc.at('title').inner_text.match(/([^ ]+)/)[0]
4145
$stderr.puts title
4246
readme = doc.at('#readme') || doc.at('#wiki-wrapper')
43-
{
47+
{
4448
title: title,
4549
path: save_article_and_return_path(readme, title)
4650
}
47-
}
51+
end
4852
}
49-
}
53+
end
5054
end
5155

52-
def fixup_html! doc
56+
def fixup_html!(doc)
5357
# stub this out because it causes encoding issues with UTF characters like em-dash
5458
# (investigate this later)
5559
end
5660

5761
def save_article_and_return_path readme, title
58-
path = "articles/" + title.gsub(/\W/, '-')
62+
path = "articles/#{title.gsub(/\W/, '-')}"
5963
content = readme.inner_html
60-
File.open("#{output_dir}/#{path}", 'w') {|f| f.puts content}
64+
File.open("#{output_dir}/#{path}", 'w') { |f| f.puts(content) }
6165
path
6266
end
6367
end

0 commit comments

Comments
 (0)