Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Creating EvaluatorBase #257

Merged
merged 15 commits into from
Nov 6, 2024
269 changes: 74 additions & 195 deletions api/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sys
import urllib
import xml.etree.ElementTree as ET
from abc import ABC, abstractmethod
from functools import wraps

import idutils
Expand Down Expand Up @@ -63,53 +64,7 @@ def wrapper(plugin, **kwargs):
return wrapper


class ConfigTerms(property):
def __init__(self, term_id):
self.term_id = term_id

def __call__(self, wrapped_func):
@wraps(wrapped_func)
def wrapper(plugin, **kwargs):
metadata = plugin.metadata
has_metadata = True

term_list = ast.literal_eval(plugin.config[plugin.name][self.term_id])
# Get values in config for the given term
if not term_list:
msg = (
"Cannot find any value for term <%s> in configuration"
% self.term_id
)
has_metadata = False
else:
# Get metadata associated with the term ID
term_metadata = pd.DataFrame(
term_list, columns=["element", "qualifier"]
)
term_metadata = ut.check_metadata_terms_with_values(
metadata, term_metadata
)
if term_metadata.empty:
msg = (
"No access information can be found in the metadata for: %s. Please double-check the value/s provided for '%s' configuration parameter"
% (term_list, self.term_id)
)
has_metadata = False

if not has_metadata:
logger.warning(msg)
return (0, [{"message": msg, "points": 0}])

# Update kwargs with collected metadata for the required terms
kwargs.update(
{self.term_id: {"list": term_list, "metadata": term_metadata}}
)
return wrapped_func(plugin, **kwargs)

return wrapper


class Evaluator(object):
class EvaluatorBase(ABC):
"""A class used to define FAIR indicators tests. It contains all the references to all the tests

...
Expand All @@ -126,115 +81,79 @@ class Evaluator(object):
lang : Language
"""

def __init__(self, item_id, oai_base=None, lang="en", plugin=None, config=None):
def __init__(self, item_id, oai_base=None, lang="en", config=None, name=None):
self.item_id = item_id
self.oai_base = oai_base
self.metadata = None
self.access_protocols = []
self.cvs = []
self.config = config
# configuration terms
self.terms_access_metadata = pd.DataFrame()
self.terms_license_metadata = pd.DataFrame()

logger.debug("OAI_BASE IN evaluator: %s" % oai_base)
if oai_base is not None and oai_base != "" and self.metadata is None:
metadataFormats = ut.oai_metadataFormats(oai_base)
dc_prefix = ""
for e in metadataFormats:
if metadataFormats[e] == "http://www.openarchives.org/OAI/2.0/oai_dc/":
dc_prefix = e
logger.debug("DC_PREFIX: %s" % dc_prefix)

try:
id_type = idutils.detect_identifier_schemes(self.item_id)[0]
except Exception as e:
id_type = "internal"

logger.debug("Trying to get metadata")
try:
item_metadata = ut.oai_get_metadata(
ut.oai_check_record_url(oai_base, dc_prefix, self.item_id)
).find(".//{http://www.openarchives.org/OAI/2.0/}metadata")
except Exception as e:
logger.error("Problem getting metadata: %s" % e)
item_metadata = ET.fromstring("<metadata></metadata>")
data = []
for tags in item_metadata.findall(".//"):
metadata_schema = tags.tag[0 : tags.tag.rfind("}") + 1]
element = tags.tag[tags.tag.rfind("}") + 1 : len(tags.tag)]
text_value = tags.text
qualifier = None
data.append([metadata_schema, element, text_value, qualifier])
self.metadata = pd.DataFrame(
data, columns=["metadata_schema", "element", "text_value", "qualifier"]
)

if self.metadata is not None:
if len(self.metadata) > 0:
self.access_protocols = ["http", "oai-pmh"]

# Config attributes
self.name = plugin
if self.name == None:
self.name = "oai-pmh"
try:
self.identifier_term = ast.literal_eval(
self.config[self.name]["identifier_term"]
)
self.terms_quali_generic = ast.literal_eval(
self.config[self.name]["terms_quali_generic"]
)
self.terms_quali_disciplinar = ast.literal_eval(
self.config[self.name]["terms_quali_disciplinar"]
)
self.terms_access = ast.literal_eval(self.config[self.name]["terms_access"])
self.terms_cv = ast.literal_eval(self.config[self.name]["terms_cv"])
self.supported_data_formats = ast.literal_eval(
self.config[self.name]["supported_data_formats"]
)
self.terms_qualified_references = ast.literal_eval(
self.config[self.name]["terms_qualified_references"]
)
self.terms_relations = ast.literal_eval(
self.config[self.name]["terms_relations"]
)
self.terms_license = ast.literal_eval(
self.config[self.name]["terms_license"]
)
self.metadata_quality = 100 # Value for metadata quality
self.terms_access_protocols = ast.literal_eval(
self.config[self.name]["terms_access_protocols"]
)
self.metadata_standard = ast.literal_eval(
self.config[self.name]["metadata_standard"]
)
self.fairsharing_username = ast.literal_eval(
self.config["fairsharing"]["username"]
)
self.identifier_term = ast.literal_eval(
self.config[self.name]["identifier_term"]
)
self.terms_quali_generic = ast.literal_eval(
self.config[self.name]["terms_quali_generic"]
)
self.terms_quali_disciplinar = ast.literal_eval(
self.config[self.name]["terms_quali_disciplinar"]
)
self.terms_cv = ast.literal_eval(self.config[self.name]["terms_cv"])
self.supported_data_formats = ast.literal_eval(
self.config[self.name]["supported_data_formats"]
)
self.terms_qualified_references = ast.literal_eval(
self.config[self.name]["terms_qualified_references"]
)
self.terms_relations = ast.literal_eval(
self.config[self.name]["terms_relations"]
)
self.metadata_access_manual = ast.literal_eval(
self.config[self.name]["metadata_access_manual"]
)
self.data_access_manual = ast.literal_eval(
self.config[self.name]["data_access_manual"]
)
self.terms_access_protocols = ast.literal_eval(
self.config[self.name]["terms_access_protocols"]
)

self.fairsharing_password = ast.literal_eval(
self.config["fairsharing"]["password"]
)
self.fairsharing_metadata_path = ast.literal_eval(
self.config["fairsharing"]["metadata_path"]
)
self.fairsharing_formats_path = ast.literal_eval(
self.config["fairsharing"]["formats_path"]
)
self.internet_media_types_path = ast.literal_eval(
self.config["internet media types"]["path"]
)
self.metadata_schemas = ast.literal_eval(
self.config[self.name]["metadata_schemas"]
)
except Exception as e:
logger.error("Problem loading plugin config: %s" % e)
# self.vocabularies = ast.literal_eval(self.config[self.name]["vocabularies"])

# Translations
self.lang = lang
logger.debug("El idioma es: %s" % self.lang)
logger.debug("METAdata: %s" % self.metadata)
self.dict_vocabularies = ast.literal_eval(
self.config[self.name]["dict_vocabularies"]
)

self.vocabularies = list(self.dict_vocabularies.keys())
self.metadata_standard = ast.literal_eval(
self.config[self.name]["metadata_standard"]
)

self.metadata_authentication = ast.literal_eval(
self.config[self.name]["metadata_authentication"]
)
self.metadata_persistence = ast.literal_eval(
self.config[self.name]["metadata_persistence"]
)
self.terms_vocabularies = ast.literal_eval(
self.config[self.name]["terms_vocabularies"]
)

self.fairsharing_username = ast.literal_eval(
self.config["fairsharing"]["username"]
)

self.fairsharing_password = ast.literal_eval(
self.config["fairsharing"]["password"]
)
self.fairsharing_metadata_path = ast.literal_eval(
self.config["fairsharing"]["metadata_path"]
)
self.fairsharing_formats_path = ast.literal_eval(
self.config["fairsharing"]["formats_path"]
)
self.internet_media_types_path = ast.literal_eval(
self.config["internet media types"]["path"]
)
global _
_ = self.translation()

Expand Down Expand Up @@ -267,6 +186,11 @@ def eval_persistency(self, id_list, data_or_metadata="(meta)data"):

return (points, msg_list)

@abstractmethod
def get_metadata(self):
"""Method to be implemented by plugins."""
raise NotImplementedError("Derived class mus implement get_metadata method")

def eval_uniqueness(self, id_list, data_or_metadata="(meta)data"):
points = 0
msg_list = []
Expand Down Expand Up @@ -1423,11 +1347,12 @@ def rda_i3_01m(self, **kwargs):
term_metadata = term_data["metadata"]
id_list = []
for index, row in term_metadata.iterrows():
logging.debug(self.item_id)
logger.debug(self.item_id)

if row["text_value"].split("/")[-1] not in self.item_id:
id_list.append(row["text_value"])
points, msg_list = self.eval_persistency(id_list)
return (points, msg_list)

def rda_i3_01d(self):
"""Indicator RDA-A1-01M.
Expand Down Expand Up @@ -2066,49 +1991,3 @@ def check_standard_license(self, license_id_or_url):
% _url
)
return license_name


class ConfigTerms(property):
def __init__(self, term_id):
self.term_id = term_id

def __call__(self, wrapped_func):
@wraps(wrapped_func)
def wrapper(plugin, **kwargs):
metadata = plugin.metadata
has_metadata = True

term_list = ast.literal_eval(plugin.config[plugin.name][self.term_id])
# Get values in config for the given term
if not term_list:
msg = (
"Cannot find any value for term <%s> in configuration"
% self.term_id
)
has_metadata = False
else:
# Get metadata associated with the term ID
term_metadata = pd.DataFrame(
term_list, columns=["element", "qualifier"]
)
term_metadata = ut.check_metadata_terms_with_values(
metadata, term_metadata
)
if term_metadata.empty:
msg = (
"No access information can be found in the metadata for: %s. Please double-check the value/s provided for '%s' configuration parameter"
% (term_list, self.term_id)
)
has_metadata = False

if not has_metadata:
logger.warning(msg)
return (0, msg)

# Update kwargs with collected metadata for the required terms
kwargs.update(
{self.term_id: {"list": term_list, "metadata": term_metadata}}
)
return wrapped_func(plugin, **kwargs)

return wrapper
30 changes: 12 additions & 18 deletions api/rda.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,22 @@ def wrapper(body, **kwargs):
# Get the identifiers through a search query
ids = [item_id]

# FIXME oai-pmh should be no different
downstream_logger = evaluator.logger
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

downstream_logger = None (evaluator is not set)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

if repo not in ["oai-pmh"]:
try:
logger.debug("Trying to import plugin from plugins.%s.plugin" % (repo))
plugin = importlib.import_module("plugins.%s.plugin" % (repo), ".")
downstream_logger = plugin.logger
except Exception as e:
logger.error(str(e))
return str(e), 400
if pattern_to_query:
try:
logger.debug("Trying to import plugin from plugins.%s.plugin" % (repo))
plugin = importlib.import_module("plugins.%s.plugin" % (repo), ".")
downstream_logger = plugin.logger
ids = plugin.Plugin.get_ids(
oai_base=oai_base, pattern_to_query=pattern_to_query
)
except Exception as e:
logger.error(str(e))
return str(e), 400
if pattern_to_query:
try:
ids = plugin.Plugin.get_ids(
oai_base=oai_base, pattern_to_query=pattern_to_query
)
except Exception as e:
logger.error(str(e))
return str(e), 400

# Set handler for evaluator logs
evaluator_handler = ut.EvaluatorLogHandler()
Expand All @@ -66,11 +64,7 @@ def wrapper(body, **kwargs):
result = {}
exit_code = 200
for item_id in ids:
# FIXME oai-pmh should be no different
if repo in ["oai-pmh"]:
eva = evaluator.Evaluator(item_id, oai_base, lang, config=config_data)
else:
eva = plugin.Plugin(item_id, oai_base, lang, config=config_data)
eva = plugin.Plugin(item_id, oai_base, lang, config=config_data)
_result, _exit_code = wrapped_func(body, eva=eva)
logger.debug(
"Raw result returned for indicator ID '%s': %s" % (item_id, _result)
Expand Down
Loading
Loading