Skip to content

Commit

Permalink
Merge pull request #257 from IFCA-Advanced-Computing/dev/evaluator_base
Browse files Browse the repository at this point in the history
Creating EvaluatorBase
  • Loading branch information
ferag authored Nov 6, 2024
2 parents 885f3f0 + c59b878 commit 80f0a8d
Show file tree
Hide file tree
Showing 12 changed files with 2,587 additions and 598 deletions.
372 changes: 133 additions & 239 deletions api/evaluator.py

Large diffs are not rendered by default.

36 changes: 16 additions & 20 deletions api/rda.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def load_evaluator(wrapped_func):
def wrapper(body, **kwargs):
repo = body.get("repo")
item_id = body.get("id", "")
oai_base = body.get("oai_base")
api_endpoint = body.get("api_endpoint")
lang = body.get("lang", "en")
pattern_to_query = body.get("q", "")

Expand All @@ -36,24 +36,22 @@ def wrapper(body, **kwargs):
# Get the identifiers through a search query
ids = [item_id]

# FIXME oai-pmh should be no different
downstream_logger = evaluator.logger
if repo not in ["oai-pmh"]:
downstream_logger = None
try:
logger.debug("Trying to import plugin from plugins.%s.plugin" % (repo))
plugin = importlib.import_module("plugins.%s.plugin" % (repo), ".")
downstream_logger = plugin.logger
except Exception as e:
logger.error(str(e))
return str(e), 400
if pattern_to_query:
try:
logger.debug("Trying to import plugin from plugins.%s.plugin" % (repo))
plugin = importlib.import_module("plugins.%s.plugin" % (repo), ".")
downstream_logger = plugin.logger
ids = plugin.Plugin.get_ids(
api_endpoint=api_endpoint, pattern_to_query=pattern_to_query
)
except Exception as e:
logger.error(str(e))
return str(e), 400
if pattern_to_query:
try:
ids = plugin.Plugin.get_ids(
oai_base=oai_base, pattern_to_query=pattern_to_query
)
except Exception as e:
logger.error(str(e))
return str(e), 400

# Set handler for evaluator logs
evaluator_handler = ut.EvaluatorLogHandler()
Expand All @@ -66,11 +64,9 @@ def wrapper(body, **kwargs):
result = {}
exit_code = 200
for item_id in ids:
# FIXME oai-pmh should be no different
if repo in ["oai-pmh"]:
eva = evaluator.Evaluator(item_id, oai_base, lang, config=config_data)
else:
eva = plugin.Plugin(item_id, oai_base, lang, config=config_data)
eva = plugin.Plugin(
item_id, api_endpoint, lang, name=repo, config=config_data
)
_result, _exit_code = wrapped_func(body, eva=eva)
logger.debug(
"Raw result returned for indicator ID '%s': %s" % (item_id, _result)
Expand Down
261 changes: 128 additions & 133 deletions api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,26 +128,6 @@ def test_status(points):
return test_status


def oai_identify(oai_base):
action = "?verb=Identify"
return oai_request(oai_base, action)


def oai_metadataFormats(oai_base):
action = "?verb=ListMetadataFormats"
xmlTree = oai_request(oai_base, action)
metadataFormats = {}
for e in xmlTree.findall(".//{http://www.openarchives.org/OAI/2.0/}metadataFormat"):
metadataPrefix = e.find(
"{http://www.openarchives.org/OAI/2.0/}metadataPrefix"
).text
namespace = e.find(
"{http://www.openarchives.org/OAI/2.0/}metadataNamespace"
).text
metadataFormats[metadataPrefix] = namespace
return metadataFormats


def is_persistent_id(item_id):
"""Returns boolean if the item id is or not a persistent identifier.
Expand Down Expand Up @@ -448,119 +428,6 @@ def check_metadata_terms_with_values(metadata, terms):
return df_access


def oai_check_record_url(oai_base, metadata_prefix, pid):
endpoint_root = urllib.parse.urlparse(oai_base).netloc
try:
pid_type = idutils.detect_identifier_schemes(pid)[0]
except Exception as e:
pid_type = "internal"
logging.error(e)
if pid_type != "internal":
oai_pid = idutils.normalize_pid(pid, pid_type)
else:
oai_pid = pid
action = "?verb=GetRecord"

test_id = "oai:%s:%s" % (endpoint_root, oai_pid)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)
url_final = ""
url = oai_base + action + params
response = requests.get(url, verify=False, allow_redirects=True)
logging.debug("Trying ID v1: url: %s | status: %i" % (url, response.status_code))
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

test_id = "%s" % (oai_pid)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)

url = oai_base + action + params
logging.debug("Trying: " + url)
response = requests.get(url, verify=False)
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

test_id = "%s:%s" % (pid_type, oai_pid)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)

url = oai_base + action + params
logging.debug("Trying: " + url)
response = requests.get(url, verify=False)
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

test_id = "oai:%s:%s" % (
endpoint_root,
oai_pid[oai_pid.rfind(".") + 1 : len(oai_pid)],
)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)

url = oai_base + action + params
logging.debug("Trying: " + url)
response = requests.get(url, verify=False)
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

test_id = "oai:%s:b2rec/%s" % (
endpoint_root,
oai_pid[oai_pid.rfind(".") + 1 : len(oai_pid)],
)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)

url = oai_base + action + params
logging.debug("Trying: " + url)
response = requests.get(url, verify=False)
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

return url_final


def oai_get_metadata(url):
logging.debug("Metadata from: %s" % url)
oai = requests.get(url, verify=False, allow_redirects=True)
try:
xmlTree = ET.fromstring(oai.text)
except Exception as e:
logging.error("OAI_RQUEST: %s" % e)
xmlTree = None
return xmlTree


def oai_request(oai_base, action):
oai = requests.get(oai_base + action, verify=False) # Peticion al servidor
try:
xmlTree = ET.fromstring(oai.text)
except Exception as e:
logging.error("OAI_RQUEST: %s" % e)
xmlTree = ET.fromstring("<OAI-PMH></OAI-PMH>")
return xmlTree


def find_dataset_file(metadata, url, data_formats):
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
Expand Down Expand Up @@ -784,6 +651,31 @@ def check_standard_project_relation(value):
return False


def oai_request(oai_base, action):
oai = requests.get(oai_base + action, verify=False) # Peticion al servidor
try:
xmlTree = ET.fromstring(oai.text)
except Exception as e:
logging.error("OAI_RQUEST: %s" % e)
xmlTree = ET.fromstring("<OAI-PMH></OAI-PMH>")
return xmlTree


def oai_metadataFormats(oai_base):
action = "?verb=ListMetadataFormats"
xmlTree = oai_request(oai_base, action)
metadataFormats = {}
for e in xmlTree.findall(".//{http://www.openarchives.org/OAI/2.0/}metadataFormat"):
metadataPrefix = e.find(
"{http://www.openarchives.org/OAI/2.0/}metadataPrefix"
).text
namespace = e.find(
"{http://www.openarchives.org/OAI/2.0/}metadataNamespace"
).text
metadataFormats[metadataPrefix] = namespace
return metadataFormats


def get_rdf_metadata_format(oai_base):
rdf_schemas = []
try:
Expand All @@ -797,6 +689,109 @@ def get_rdf_metadata_format(oai_base):
return rdf_schemas


def oai_check_record_url(oai_base, metadata_prefix, pid):
endpoint_root = urllib.parse.urlparse(oai_base).netloc
try:
pid_type = idutils.detect_identifier_schemes(pid)[0]
except Exception as e:
pid_type = "internal"
logging.error(e)
if pid_type != "internal":
oai_pid = idutils.normalize_pid(pid, pid_type)
else:
oai_pid = pid
action = "?verb=GetRecord"

test_id = "oai:%s:%s" % (endpoint_root, oai_pid)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)
url_final = ""
url = oai_base + action + params
response = requests.get(url, verify=False, allow_redirects=True)
logging.debug("Trying ID v1: url: %s | status: %i" % (url, response.status_code))
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

test_id = "%s" % (oai_pid)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)

url = oai_base + action + params
logging.debug("Trying: " + url)
response = requests.get(url, verify=False)
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

test_id = "%s:%s" % (pid_type, oai_pid)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)

url = oai_base + action + params
logging.debug("Trying: " + url)
response = requests.get(url, verify=False)
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

test_id = "oai:%s:%s" % (
endpoint_root,
oai_pid[oai_pid.rfind(".") + 1 : len(oai_pid)],
)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)

url = oai_base + action + params
logging.debug("Trying: " + url)
response = requests.get(url, verify=False)
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

test_id = "oai:%s:b2rec/%s" % (
endpoint_root,
oai_pid[oai_pid.rfind(".") + 1 : len(oai_pid)],
)
params = "&metadataPrefix=%s&identifier=%s" % (metadata_prefix, test_id)

url = oai_base + action + params
logging.debug("Trying: " + url)
response = requests.get(url, verify=False)
error = 0
for tags in ET.fromstring(response.text).findall(
".//{http://www.openarchives.org/OAI/2.0/}error"
):
error = error + 1
if error == 0:
url_final = url

return url_final


def oai_get_metadata(url):
logger.debug("Metadata from: %s" % url)
oai = requests.get(url, verify=False, allow_redirects=True)
try:
xmlTree = ET.fromstring(oai.text)
except Exception as e:
logger.error("OAI_RQUEST: %s" % e)
xmlTree = None
return xmlTree


def licenses_list():
url = "https://spdx.org/licenses/licenses.json"
headers = {"Accept": "application/json"} # Type of response accpeted
Expand Down
Loading

0 comments on commit 80f0a8d

Please sign in to comment.