Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,11 @@ group :test do
gem 'm', '~> 1.5.0'
gem 'minitest'
gem 'minitest-ci', '~> 3.4.0'
gem "minitest-rails", "~> 7.0"
gem 'minitest-reporters'
gem 'rails-controller-testing'
gem 'simplecov', require: false
gem 'webmock', '~> 3.23'
end

gem 'net-ftp'
Expand Down
17 changes: 17 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ GEM
dry-validation (~> 1.0, >= 1.0.0)
connection_pool (2.4.1)
content_disposition (1.0.0)
crack (1.0.0)
bigdecimal
rexml
crass (1.0.6)
csv (3.3.0)
database_cleaner (2.0.2)
Expand Down Expand Up @@ -526,6 +529,9 @@ GEM
minitest (5.25.1)
minitest-ci (3.4.0)
minitest (>= 5.0.6)
minitest-rails (7.0.1)
minitest (~> 5.10)
railties (~> 7.0.0)
minitest-reporters (1.7.1)
ansi
builder
Expand Down Expand Up @@ -617,6 +623,10 @@ GEM
activesupport (= 7.0.8.4)
bundler (>= 1.15.0)
railties (= 7.0.8.4)
rails-controller-testing (1.0.5)
actionpack (>= 5.0.1.rc1)
actionview (>= 5.0.1.rc1)
activesupport (>= 5.0.1.rc1)
rails-dom-testing (2.2.0)
activesupport (>= 5.0.0)
minitest
Expand Down Expand Up @@ -815,6 +825,10 @@ GEM
activemodel (>= 6.0.0)
bindex (>= 0.4.0)
railties (>= 6.0.0)
webmock (3.23.1)
addressable (>= 2.8.0)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)
webpacker (5.4.4)
activesupport (>= 5.2)
rack-proxy (>= 0.6.1)
Expand Down Expand Up @@ -886,6 +900,7 @@ DEPENDENCIES
mini_magick (~> 4.9.4)
minitest
minitest-ci (~> 3.4.0)
minitest-rails (~> 7.0)
minitest-reporters
net-ftp
noticed
Expand All @@ -897,6 +912,7 @@ DEPENDENCIES
rack-cors
rack-mini-profiler (~> 2.3.1)
rails (~> 7.0.0)
rails-controller-testing
rexml
rsolr (>= 1.0)
rspec-rails (~> 3.5)
Expand All @@ -919,6 +935,7 @@ DEPENDENCIES
twitter-typeahead-rails (= 0.11.1.pre.corejavascript)
vite_rails (~> 3.0)
web-console
webmock (~> 3.23)
webpacker (~> 5.x)
whenever (~> 1.0.0)

Expand Down
7 changes: 3 additions & 4 deletions app/controllers/application_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ def allow_geoblacklight_params
blacklight_config.search_state_fields.append(Settings.GBL_PARAMS)
end

protected

def not_found
raise ActionController::RoutingError.new('Not Found')
# Pointless Feedback
def after_message_create_path
main_app.try(:root_path) || '/'
end
end
6 changes: 3 additions & 3 deletions app/controllers/errors_controller.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
class ErrorsController < ApplicationController
def not_found
render status: 404, layout: 'blacklight', template: 'errors/not_found.html.erb'
render 'errors/not_found', status: :not_found
end

def internal_server_error
render status: 500, layout: 'blacklight', template: 'errors/internal_server_error.html.erb'
render 'errors/internal_server_error', status: :internal_server_error
end
end
end
196 changes: 109 additions & 87 deletions app/services/uri_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,15 @@
require "net/http"
require "net/ftp"

# Abstract base class for representing the results of checking one URI.
class Result
attr_accessor :uri_string

# A new Result object instance.
#
# @param params [Hash] A hash of parameters. Expects :uri_string.
def initialize(params)
@uri_string = params[:uri_string]
end
end

# A Good Result. The URL is valid.
class Good < Result
end

# A Redirect to another URL.
class Redirect < Result
attr_reader :good
attr_reader :final_destination_uri_string

# A new LinkChecker::Redirect object.
#
# @param params [Hash] A hash of parameters. Expects :final_destination_uri_string,
# which is the URL that the original :uri_string redirected to.
def initialize(params)
@final_destination_uri_string = params[:final_destination_uri_string]
@good = params[:good]
super(params)
end
end

# A Error result. The URL is not valid for some reason. Any reason, other than a 200
# HTTP response.
#
# @param params [Hash] A hash of parameters. Expects :error, which is a string
# representing the error.
class Error < Result
attr_reader :error
def initialize(params)
@error = params[:error]
super(params)
end
end

class UriService
def initialize(solr_document_uri)
@uri = solr_document_uri
@metadata = Hash.new
@metadata['solr_doc_id'] = @uri.document_id
@metadata['uri_key'] = @uri.uri_key
@metadata['uri_value'] = @uri.uri_value
@metadata['solr_version'] = @uri.version
@metadata = {
'solr_doc_id' => @uri.document_id,
'uri_key' => @uri.uri_key,
'uri_value' => @uri.uri_value,
'solr_version' => @uri.version
}

@uri.state_machine.transition_to!(:processing, @metadata)

Expand All @@ -68,60 +24,74 @@ def initialize(solr_document_uri)
)
end

# Captures the uri's validity in SolrDocumentUri
# @return [Boolean]
#
# @TODO: EWL
def process
# Gentle hands.
sleep(1)

uri = normalize_uri(@uri.uri_value)

if uri.scheme.start_with?('http')
result = check_uri(uri)
process_http_uri(uri)
elsif uri.scheme.start_with?('ftp')
process_ftp_uri(uri)
else
@uri.state_machine.transition_to!(:failed, @metadata.merge('error' => 'Unsupported URI scheme'))
end

if result.instance_of?(Good)
@uri.state_machine.transition_to!(:succeeded, @metadata)
elsif result.instance_of?(Redirect)
@metadata["final_destination_uri_string"] = result.final_destination_uri_string
@uri.state_machine.transition_to!(:succeeded, @metadata)
else
@uri.state_machine.transition_to!(:failed, @metadata)
end
log_output
rescue => e
@metadata['exception'] = e.inspect
@uri.state_machine.transition_to!(:failed, @metadata)
log_output
end

elsif uri.scheme.start_with?('ftp')
private

Net::FTP.open(uri.host) do |ftp|
ftp.passive = true
ftp.login 'anonymous', 'anonymous@google.com'
def process_http_uri(uri)
result = check_uri(uri)

# Check for file extension
if File.extname(uri.path).size > 0
size = ftp.size(uri.path)
if result.instance_of?(Good) || result.instance_of?(Redirect)
@uri.state_machine.transition_to!(:succeeded, @metadata)
else
@uri.state_machine.transition_to!(:failed, @metadata)
end
end

def process_ftp_uri(uri)
Net::FTP.open(uri.host) do |ftp|
ftp.passive = true
ftp.login 'anonymous', 'anonymous@google.com'

path = uri.path.sub(/^\//, '') # Remove leading slash if present
if File.extname(path).size > 0
begin
size = ftp.size(path)
if size > 0
@uri.state_machine.transition_to!(:succeeded, @metadata)
else
@uri.state_machine.transition_to!(:failed, @metadata.merge('error' => 'File size is 0'))
end
elsif check_ftp_path(ftp, uri.path)
@uri.state_machine.transition_to!(:succeeded, @metadata)
else
@uri.state_machine.transition_to!(:failed, @metadata)
rescue Net::FTPPermError
@uri.state_machine.transition_to!(:failed, @metadata.merge('error' => 'File not found'))
end
else
begin
if check_ftp_path(ftp, path)
@uri.state_machine.transition_to!(:succeeded, @metadata)
else
@uri.state_machine.transition_to!(:failed, @metadata.merge('error' => 'Directory not found'))
end
rescue Net::FTPPermError
@uri.state_machine.transition_to!(:failed, @metadata.merge('error' => 'Directory not found'))
end
end
end
log_output

rescue Exception => invalid
@metadata['exception'] = invalid.inspect
@uri.state_machine.transition_to!(:failed,@metadata)
log_output
rescue Net::FTPPermError, Net::FTPReplyError => e
@uri.state_machine.transition_to!(:failed, @metadata.merge('error' => e.message))
end

def log_output
@metadata["state"] = @uri.state_machine.current_state
@metadata.each do |key,value|
@logger.tagged(@uri.id, key.to_s) { @logger.info value }
end

def check_ftp_path(ftp, path)
ftp.chdir(path)
ftp.pwd == "/#{path}"
end

def check_uri(uri, redirected=false)
Expand Down Expand Up @@ -179,4 +149,56 @@ def check_ftp_path(ftp, path)
def normalize_uri(uri_string)
URI.parse(Addressable::URI.parse(uri_string).normalize.to_s)
end

def log_output
@metadata["state"] = @uri.state_machine.current_state
@metadata.each do |key,value|
@logger.tagged(@uri.id, key.to_s) { @logger.info value }
end
end
end

# Abstract base class for representing the results of checking one URI.
class Result
attr_accessor :uri_string

# A new Result object instance.
#
# @param params [Hash] A hash of parameters. Expects :uri_string.
def initialize(params)
@uri_string = params[:uri_string]
end
end

# A Good Result. The URL is valid.
class Good < Result
end

# A Redirect to another URL.
class Redirect < Result
attr_reader :good
attr_reader :final_destination_uri_string

# A new LinkChecker::Redirect object.
#
# @param params [Hash] A hash of parameters. Expects :final_destination_uri_string,
# which is the URL that the original :uri_string redirected to.
def initialize(params)
@final_destination_uri_string = params[:final_destination_uri_string]
@good = params[:good]
super(params)
end
end

# A Error result. The URL is not valid for some reason. Any reason, other than a 200
# HTTP response.
#
# @param params [Hash] A hash of parameters. Expects :error, which is a string
# representing the error.
class Error < Result
attr_reader :error
def initialize(params)
@error = params[:error]
super(params)
end
end
2 changes: 1 addition & 1 deletion docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ bundle exec rake geoportal:server
## Test Locally

RAILS_ENV=test bundle exec rake geoportal:test
RAILS_ENV=test bundle exec rake test:system test
RAILS_ENV=test bundle exec rails test:system test
Loading