diff --git a/.gitignore b/.gitignore index f4d2ab3b2c..2fa908009b 100644 --- a/.gitignore +++ b/.gitignore @@ -166,3 +166,5 @@ storage/ /config/credentials/stage.yaml.enc /config/credentials/production.key /config/credentials/production.yaml.enc +.nvmrc + diff --git a/app/services/external_apis/ror_service.rb b/app/services/external_apis/ror_service.rb index 4aa4b159b6..44fa3fec7e 100644 --- a/app/services/external_apis/ror_service.rb +++ b/app/services/external_apis/ror_service.rb @@ -66,7 +66,7 @@ def fetch(force: false) if old_checksum_val == metadata[:checksum] log_message(method: method, message: 'There is no new ROR file to process.') else - download_file = download_file = metadata['key'] + download_file = metadata['key'] download_url = metadata.fetch('links', {}).fetch('download', metadata.fetch('links', {})['self']) log_message(method: method, message: "New ROR file detected - checksum #{metadata[:checksum]}") log_message(method: method, message: "Downloading #{download_file}") @@ -174,7 +174,7 @@ def process_ror_file(zip_file:, file:) log_message( method: method, - message: "Unable to process record for: '#{hash&.fetch('name', 'unknown')}'", + message: "Unable to process record for: '#{hash.fetch('names', []).first&.fetch('value', 'unknown')}'", info: false ) end @@ -204,14 +204,16 @@ def process_ror_record(record:, time:) registry_org = RegistryOrg.find_or_create_by(ror_id: record['id']) registry_org.name = safe_string(value: org_name(item: record)) - registry_org.acronyms = record['acronyms'] - registry_org.aliases = record['aliases'] - registry_org.country = record['country'] + registry_org.acronyms = extract_names(item: record, type: 'acronym') + registry_org.aliases = extract_names(item: record, type: 'alias') + registry_org.country = extract_country(item: record) registry_org.types = record['types'] registry_org.language = org_language(item: record) registry_org.file_timestamp = time.strftime('%Y-%m-%d %H:%M:%S') registry_org.fundref_id = fundref_id(item: record) - registry_org.home_page = safe_string(value: record.fetch('links', []).first) + + website = record.fetch('links', []).find { |l| l['type'] == 'website' } + registry_org.home_page = safe_string(value: website ? website['value'] : nil) # Attempt to find a matching Org record registry_org.org_id = check_for_org_association(registry_org: registry_org) @@ -250,15 +252,26 @@ def check_for_org_association(registry_org:) # "Example College (example.edu)" # "Example College (Brazil)" def org_name(item:) - return '' unless item.present? && item['name'].present? + return '' unless item.present? && item['names'].present? + + # Find ror_display name + name_obj = item['names'].find { |n| n['types']&.include?('ror_display') } + name = name_obj ? name_obj['value'] : item['names'].first['value'] + + return '' if name.blank? + + country = extract_country(item: item)&.fetch('country_name', '') + + # Try to get the domain from the 'domains' array first + website = item.fetch('domains', []).first + # Fallback to extracting it from the website link + website = org_website(item: item) if website.blank? - country = item.fetch('country', {}).fetch('country_name', '') - website = org_website(item: item) # If no website or country then just return the name - return item['name'] unless website.present? || country.present? + return name unless website.present? || country.present? # Otherwise return the contextualized name - "#{item['name']} (#{website || country})" + "#{name} (#{website || country})" end # Extracts the org's ISO639 if available @@ -266,38 +279,58 @@ def org_language(item:) dflt = I18n.default_locale || 'en' return dflt if item.blank? - country = item.fetch('country', {}).fetch('country_code', '') - labels = case country - when 'US' - [{ iso639: 'en' }] - else - item.fetch('labels', [{ iso639: dflt }]) - end - labels.first&.fetch('iso639', I18n.default_locale) || dflt + # Try to get language from ror_display name + name_obj = item.fetch('names', []).find { |n| n['types']&.include?('ror_display') } + return name_obj['lang'] if name_obj.present? && name_obj['lang'].present? + + dflt end # Extracts the website domain from the item def org_website(item:) return nil unless item.present? && item.fetch('links', [])&.any? - return nil if item['links'].first.blank? + + website_obj = item['links'].find { |l| l['type'] == 'website' } + return nil unless website_obj.present? && website_obj['value'].present? # A website was found, so extract just the domain without the www domain_regex = %r{^(?:http://|www\.|https://)([^/]+)} - website = item['links'].first.scan(domain_regex).last.first - website.gsub('www.', '') + website = website_obj['value'].scan(domain_regex).last&.first + website&.gsub('www.', '') end # Extracts the FundRef Id if available def fundref_id(item:) return '' unless item.present? && item['external_ids'].present? - return '' unless item['external_ids'].fetch('FundRef', {}).any? + + fundref = item['external_ids'].find { |id| id['type'] == 'fundref' } + return '' unless fundref.present? + + return fundref['preferred'] if fundref['preferred'].present? + + fundref.fetch('all', []).first + end - # If a preferred Id was specified then use it - ret = item['external_ids'].fetch('FundRef', {}).fetch('preferred', '') - return ret if ret.present? + # Helper to extract names by type + def extract_names(item:, type:) + return [] unless item.present? && item['names'].present? + + item['names'].select { |n| n['types']&.include?(type) }.map { |n| n['value'] } + end - # Otherwise take the first one listed - item['external_ids'].fetch('FundRef', {}).fetch('all', []).first + # Helper to extract country + def extract_country(item:) + return nil unless item.present? && item['locations'].present? + + # Assuming we take the first location + loc = item['locations'].first + return nil unless loc.present? && loc['geonames_details'].present? + + details = loc['geonames_details'] + { + 'country_name' => details['country_name'], + 'country_code' => details['country_code'] + } end end end diff --git a/spec/services/external_apis/ror_service_spec.rb b/spec/services/external_apis/ror_service_spec.rb index 6f32f33dd6..3dd309d388 100644 --- a/spec/services/external_apis/ror_service_spec.rb +++ b/spec/services/external_apis/ror_service_spec.rb @@ -42,7 +42,7 @@ xit 'returns an empty array' do expect(described_class.search(term: @term)).to eql([]) end - xit 'logs the response as an error' do + it 'logs the response as an error' do described_class.expects(:handle_http_failure).at_least(1) described_class.search(term: @term) end @@ -70,28 +70,36 @@ items: [ { id: 'https://ror.org/1234567890', - name: 'Example University', + names: [ + { types: ['ror_display'], value: 'Example University' }, + { types: ['alias'], value: 'Example' }, + { types: ['acronym'], value: 'EU' } + ], types: ['Education'], - links: ['http://example.edu/'], - aliases: ['Example'], - acronyms: ['EU'], + links: [{ type: 'website', value: 'http://example.edu/' }], status: 'active', - country: { country_name: 'United States', country_code: 'US' }, - external_ids: { - GRID: { preferred: 'grid.12345.1', all: 'grid.12345.1' } - } + locations: [ + { geonames_details: { country_name: 'United States', country_code: 'US' } } + ], + external_ids: [ + { type: 'grid', preferred: 'grid.12345.1', all: ['grid.12345.1'] } + ] }, { id: 'https://ror.org/0987654321', - name: 'Universidade de Example', + names: [ + { types: ['ror_display'], value: 'Universidade de Example' }, + { types: ['alias'], value: 'Example' }, + { types: ['acronym'], value: 'EU' } + ], types: ['Education'], links: [], - aliases: ['Example'], - acronyms: ['EU'], status: 'active', - country: { country_name: 'Mexico', country_code: 'MX' }, - external_ids: { - GRID: { preferred: 'grid.98765.8', all: 'grid.98765.8' } - } + locations: [ + { geonames_details: { country_name: 'Mexico', country_code: 'MX' } } + ], + external_ids: [ + { type: 'grid', preferred: 'grid.98765.8', all: ['grid.98765.8'] } + ] } ] } @@ -132,8 +140,8 @@ time_taken: 5, items: [{ id: Faker::Internet.url, - name: Faker::Lorem.word, - country: { country_name: Faker::Lorem.word } + names: [{ types: ['ror_display'], value: Faker::Lorem.word }], + locations: [{ geonames_details: { country_name: Faker::Lorem.word } }] }] } @term = Faker::Lorem.word @@ -206,8 +214,8 @@ items = Array.new(4).map do { id: Faker::Internet.unique.url, - name: Faker::Lorem.word, - country: { country_name: Faker::Lorem.word } + names: [{ types: ['ror_display'], value: Faker::Lorem.word }], + locations: [{ geonames_details: { country_name: Faker::Lorem.word } }] } end results1 = { number_of_results: 4, items: items } @@ -225,8 +233,8 @@ items = Array.new(7).map do { id: Faker::Internet.unique.url, - name: Faker::Lorem.word, - country: { country_name: Faker::Lorem.word } + names: [{ types: ['ror_display'], value: Faker::Lorem.word }], + locations: [{ geonames_details: { country_name: Faker::Lorem.word } }] } end results1 = { number_of_results: 7, items: items[0..4] } @@ -247,8 +255,8 @@ items = Array.new(12).map do { id: Faker::Internet.unique.url, - name: Faker::Lorem.word, - country: { country_name: Faker::Lorem.word } + names: [{ types: ['ror_display'], value: Faker::Lorem.word }], + locations: [{ geonames_details: { country_name: Faker::Lorem.word } }] } end results1 = { number_of_results: 12, items: items[0..4] } @@ -273,17 +281,17 @@ end xit 'ignores items with no name or id' do json = { items: [ - { id: Faker::Internet.url, name: Faker::Lorem.word }, + { id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] }, { id: Faker::Internet.url }, - { name: Faker::Lorem.word } + { names: [{ types: ['ror_display'], value: Faker::Lorem.word }] } ] }.to_json items = described_class.send(:parse_results, json: JSON.parse(json)) expect(items.length).to eql(1) end xit 'returns the correct number of results' do json = { items: [ - { id: Faker::Internet.url, name: Faker::Lorem.word }, - { id: Faker::Internet.url, name: Faker::Lorem.word } + { id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] }, + { id: Faker::Internet.url, names: [{ types: ['ror_display'], value: Faker::Lorem.word }] } ] }.to_json items = described_class.send(:parse_results, json: JSON.parse(json)) expect(items.length).to eql(2) @@ -292,31 +300,31 @@ describe '#org_name' do xit 'returns nil if there is no name' do - json = { country: { country_name: 'Nowhere' } }.to_json + json = { locations: [{ geonames_details: { country_name: 'Nowhere' } }] }.to_json expect(described_class.send(:org_name, item: JSON.parse(json))).to eql('') end xit 'properly appends the website if available' do json = { - name: 'Example College', - links: ['https://example.edu'], - country: { country_name: 'Nowhere' } + names: [{ types: ['ror_display'], value: 'Example College' }], + links: [{ type: 'website', value: 'https://example.edu' }], + locations: [{ geonames_details: { country_name: 'Nowhere' } }] }.to_json expected = 'Example College (example.edu)' expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected) end - xit 'properly appends the country if available and no website is available' do + it 'properly appends the country if available and no website is available' do json = { - name: 'Example College', - country: { country_name: 'Nowhere' } + names: [{ types: ['ror_display'], value: 'Example College' }], + locations: [{ geonames_details: { country_name: 'Nowhere' } }] }.to_json expected = 'Example College (Nowhere)' expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected) end xit 'properly handles an item with no website or country' do json = { - name: 'Example College', + names: [{ types: ['ror_display'], value: 'Example College' }], links: [], - country: {} + locations: [] }.to_json expected = 'Example College' expect(described_class.send(:org_name, item: JSON.parse(json))).to eql(expected) @@ -332,35 +340,35 @@ expect(described_class.send(:org_website, item: nil)).to eql(nil) end xit 'returns the domain only' do - item = JSON.parse({ links: ['https://example.org/path?a=b'] }.to_json) + item = JSON.parse({ links: [{ type: 'website', value: 'https://example.org/path?a=b' }] }.to_json) expect(described_class.send(:org_website, item: item)).to eql('example.org') end xit 'removes the www prefix' do - item = JSON.parse({ links: ['www.example.org'] }.to_json) + item = JSON.parse({ links: [{ type: 'website', value: 'www.example.org' }] }.to_json) expect(described_class.send(:org_website, item: item)).to eql('example.org') end end describe '#fundref_id' do before(:each) do - @hash = { external_ids: {} } + @hash = { external_ids: [] } end xit 'returns a blank if no external_ids are present' do json = JSON.parse(@hash.to_json) expect(described_class.send(:fundref_id, item: json)).to eql('') end xit 'returns a blank if no FundRef ids are present' do - @hash['external_ids'] = { FundRef: {} } + @hash['external_ids'] = [{ type: 'grid', preferred: '1', all: %w[2 1] }] json = JSON.parse(@hash.to_json) expect(described_class.send(:fundref_id, item: json)).to eql('') end xit 'returns the preferred id when specified' do - @hash['external_ids'] = { FundRef: { preferred: '1', all: %w[2 1] } } + @hash['external_ids'] = [{ type: 'fundref', preferred: '1', all: %w[2 1] }] json = JSON.parse(@hash.to_json) expect(described_class.send(:fundref_id, item: json)).to eql('1') end xit 'returns the firstid if no preferred is specified' do - @hash['external_ids'] = { FundRef: { preferred: nil, all: %w[2 1] } } + @hash['external_ids'] = [{ type: 'fundref', preferred: nil, all: %w[2 1] }] json = JSON.parse(@hash.to_json) expect(described_class.send(:fundref_id, item: json)).to eql('2') end diff --git a/spec/support/helpers/webmocks.rb b/spec/support/helpers/webmocks.rb index 0e13645230..de93a55f19 100644 --- a/spec/support/helpers/webmocks.rb +++ b/spec/support/helpers/webmocks.rb @@ -49,12 +49,12 @@ def mocked_ror_response 10.times.each do body[:items] << { id: Faker::Internet.url(host: 'ror.org'), - name: Faker::Company.unique.name, - links: [[Faker::Internet.url, nil].sample], - country: { country_name: Faker::Books::Dune.planet }, - external_ids: { - FundRef: { preferred: nil, all: [Faker::Number.number(digits: 6)] } - } + names: [{ types: ['ror_display'], value: Faker::Company.unique.name }], + links: [{ type: 'website', value: Faker::Internet.url }], + locations: [{ geonames_details: { country_name: Faker::Books::Dune.planet } }], + external_ids: [ + { type: 'fundref', preferred: nil, all: [Faker::Number.number(digits: 6)] } + ] } end body.to_json