-
Notifications
You must be signed in to change notification settings - Fork 112
/
Copy pathimport.rb
143 lines (123 loc) · 4.05 KB
/
import.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
require 'bible_parser'
require 'bible_ref'
require 'bundler/setup'
require 'dotenv'
require 'mysql2'
require 'optparse'
require 'sequel'
Dotenv.load
@options = {
bibles_path: './bibles'
}
OptionParser.new do |opts|
opts.banner = 'Usage: ruby import.rb [options]'
opts.on('-t', '--translation=NAME', 'Only import a single translation (e.g. eng-ylt.osis.xml)') do |name|
@options[:translation] = name
end
opts.on('--bibles-path=PATH', 'Specify custom path for open-bibles (default: #{@options[:bibles_path].inspect})') do |path|
@options[:bibles_path] = path
end
opts.on('--overwrite', 'Overwrite any existing data') do
@options[:overwrite] = true
end
opts.on('--drop-tables', 'Drop all tables first (and recreate them)') do
@options[:drop_tables] = true
end
opts.on('-h', '--help') do
puts opts
exit
end
end.parse!
unless ENV['DATABASE_URL']
puts 'Must set the DATABASE_URL environment variable (probably in .env)'
exit 1
end
DB = Sequel.connect(ENV['DATABASE_URL'].sub(%r{mysql://}, 'mysql2://'), encoding: 'utf8mb4')
class Importer
def import(path, translation_id)
puts ' importing...'
bible = BibleParser.new(File.open(path))
bible.each_verse do |verse|
data = verse.to_h
data[:book] = data.delete(:book_title)
data[:chapter] = data.delete(:chapter_num)
data[:verse] = data.delete(:num)
data[:translation_id] = translation_id
print " #{translation_id} - #{data[:book]} #{data[:chapter]}:#{data[:verse]} \r"
DB[:verses].insert(data)
end
puts ' done '
end
end
if @options[:drop_tables]
DB.drop_table :translations
DB.drop_table :verses
end
DB.create_table? :translations, charset: 'utf8mb4' do
primary_key :id
String :identifier
String :name
String :language
String :language_code
String :license
end
DB.create_table? :verses, charset: 'utf8mb4' do
primary_key :id
Fixnum :book_num
String :book_id
String :book
Fixnum :chapter
Fixnum :verse
String :text, text: true
Fixnum :translation_id
end
importer = Importer.new
# grab bible file info from the README.md table (markdown format)
table = File.read("#{@options[:bibles_path]}/README.md").scan(/^ *\|.+\| *$/)
headings = table.shift.split(/\s*\|\s*/)
table.shift # junk
translations = table.map do |row|
cells = row.split(/\s*\|\s*/)
headings.each_with_index.each_with_object({}) do |(heading, index), hash|
hash[heading.downcase] = cells[index] unless heading.empty?
end
end
translations.each do |translation|
path = "#{@options[:bibles_path]}/#{translation['filename']}"
next if @options[:translation] && path.split('/').last != @options[:translation]
puts path
lang_code_and_id = translation.delete('filename').split('.').first
lang_parts = lang_code_and_id.split('-')
if lang_parts.size == 3
translation['language_code'] = lang_parts.first
translation['identifier'] = translation['abbrev'].downcase
raise 'bad abbrev' if translation['identifier'].to_s.strip == ''
elsif lang_parts.size == 2
translation['language_code'], translation['identifier'] = lang_parts
else
raise "error with language and id for lang parts: #{lang_parts.inspect}"
end
translation['language_code'] = 'zh-tw' if translation['language_code'] == 'chi'
translation.delete('format')
translation.delete('abbrev')
translation['name'] = translation.delete('version')
language = translation['language_code']
begin
BibleRef::Reference.new('John 3:16', language: language)
rescue KeyError
puts " language #{language} not supported"
next
end
existing_id = DB['select id from translations where identifier = ?', translation['identifier']].first&.fetch(:id, nil)
if existing_id
if @options[:overwrite]
DB[:verses].where(translation_id: existing_id).delete
DB[:translations].where(identifier: translation['identifier']).delete
else
puts ' skipping existing translation (pass --overwrite)'
next
end
end
id = DB[:translations].insert(translation)
importer.import(path, id)
end