Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion lib/calais/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,12 @@ def check_params
def do_request(post_fields)
@request ||= Net::HTTP::Post.new(url.path)
@request.set_form_data(post_fields)
Net::HTTP.new(url.host, url.port).start {|http| http.request(@request)}.body
resp = Net::HTTP.new(url.host, url.port).start {|http| http.request(@request)}
if resp.header[ 'Content-Encoding' ].eql?( 'gzip' ) then
Zlib::GzipReader.new( StringIO.new( resp.body ) ).read
else
resp.body
end
end

def calais_endpoint
Expand Down
36 changes: 20 additions & 16 deletions lib/calais/response.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def initialize(rdf_string)
@relevances = {} # key = String hash, val = Float relevance
@categories = []
@socialtags = []

extract_data
end

Expand All @@ -48,7 +48,7 @@ class Category
class SocialTag
attr_accessor :name, :importance
end

class Instance
attr_accessor :prefix, :exact, :suffix, :offset, :length

Expand Down Expand Up @@ -85,11 +85,12 @@ def extract_data

if doc.root.xpath("/Error[1]").first
raise Calais::Error, doc.root.xpath("/Error/Exception").first.content
end
end

doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
@language = node['language']
@submission_date = DateTime.parse node['submissionDate']
@language = node['c:language']

@submission_date = DateTime.parse node['c:submissionDate']

attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))

Expand All @@ -100,12 +101,12 @@ def extract_data
end

doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
@request_id = node['calaisRequestID']
@request_id = node['c:calaisRequestID']

attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))

@doc_title = attributes.delete('docTitle')
@doc_date = Date.parse(attributes.delete('docDate'))
@doc_date = Date.parse(attributes.delete('docDate'))

node.remove
end
Expand All @@ -114,9 +115,9 @@ def extract_data
tag = SocialTag.new
tag.name = node.xpath("c:name[1]").first.content
tag.importance = node.xpath("c:importance[1]").first.content.to_i

node.remove if node.xpath("c:categoryName[1]").first.nil?

tag
end

Expand All @@ -131,18 +132,19 @@ def extract_data
end

@relevances = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
subject_hash = node.xpath("c:subject[1]").first[:resource].split('/')[-1]
subject_hash = node.xpath("c:subject[1]").first['rdf:resource'].split('/')[-1]
acc[subject_hash] = node.xpath("c:relevance[1]").first.content.to_f

node.remove
acc
end

@entities = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
extracted_hash = node['about'].split('/')[-1] rescue nil
extracted_hash = node['rdf:about'].split('/')[-1] rescue nil

entity = Entity.new
entity.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)

entity.type = extract_type(node)
entity.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))

Expand All @@ -154,7 +156,7 @@ def extract_data
end

@relations = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
extracted_hash = node['about'].split('/')[-1] rescue nil
extracted_hash = node['rdf:about'].split('/')[-1] rescue nil

relation = Relation.new
relation.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
Expand All @@ -169,11 +171,13 @@ def extract_data
@geographies = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))


geography = Geography.new
geography.name = attributes.delete('name')
geography.calais_hash = attributes.delete('subject')
geography.calais_hash = node.xpath('c:subject').first['rdf:resource'].split('/')[-1] rescue nil
geography.attributes = attributes
geography.relevance = extract_relevance(geography.calais_hash.value)

geography.relevance = extract_relevance(geography.calais_hash )

node.remove
geography
Expand All @@ -187,7 +191,7 @@ def extract_data

def extract_instances(doc, hash)
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
instance_node.xpath("c:subject[1]").first[:resource].split("/")[-1] == hash
instance_node.xpath("c:subject[1]").first['rdf:resource'].split("/")[-1] == hash
end.map do |instance_node|
instance = Instance.from_node(instance_node)
instance_node.remove
Expand All @@ -197,7 +201,7 @@ def extract_instances(doc, hash)
end

def extract_type(node)
node.xpath("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
node.xpath("*[name()='rdf:type']")[0]['rdf:resource'].split('/')[-1]
rescue
nil
end
Expand Down