From 0a1eaaa4235b5e7851cc3f34b958240e7e61554d Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 26 Mar 2026 20:54:04 -0300 Subject: [PATCH 1/2] refactor(journal/articlemeta_format): elimina lru_cache, melhora robustez e legibilidade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Substitui @lru_cache por cache manual em _titles_in_database_medline_secs para evitar memory leak (lru_cache em método de instância retém self indefinidamente no cache do decorador) - Renomeia _medline_titles para _titles_in_database_medline_secs para consistência com o nome da property - Renomeia _former_dict_journal_history para _format_journal_history_entry (corrige typo e segue convenção _format_* da classe) - Renomeia variável key_to_issn para key_to_value (nem todos os valores são ISSNs) - Adiciona guard clause 'journal_acron.upper() if journal_acron else None' em v930 para evitar AttributeError quando journal_acron é None - Refatora _format_publisher_info: substitui loop com break por .first() para obter apenas o primeiro owner, eliminando iteração desnecessária - Refatora _format_contact_address_info: substitui try/except genérico por checagem explícita 'if address', removendo tratamento silencioso de exceções - Refatora _format_journal_history: inverte condicional para early return, unifica blocos ADMITTED/INTERRUPTED duplicados em um único 'if in', e garante que subfield_b é determinado por evento (não acumulado) - Refatora _format_indexing_info: substitui duas list comprehensions separadas por um único loop para classificar medline vs secs, evitando dupla iteração sobre titles_in_db - Refatora _format_collection_info: remove checagem redundante 'if collection' (já coberta pelo 'if self.scielo_journal.collection') - Remove guard desnecessário 'if self.official' em _format_issn_list (método só é chamado dentro de bloco que já verifica self.official) - Adiciona TODO/docstring em _format_issn_type sinalizando possível inversão na lógica de negócio (issn_print == issn_scielo retorna 'ONLIN', o que parece invertido) - Corrige docstring typo 'Title Journalal' -> 'Title Journal' - Remove comentários óbvios/redundantes (e.g. 'tem que ser objeto datetime', 'Deixa preparado para tornar obsoleto') - Normaliza trailing whitespace e vírgulas finais em dicts/listas --- journal/formats/articlemeta_format.py | 193 ++++++++++++++------------ 1 file changed, 105 insertions(+), 88 deletions(-) diff --git a/journal/formats/articlemeta_format.py b/journal/formats/articlemeta_format.py index 9622f566..8ee77780 100644 --- a/journal/formats/articlemeta_format.py +++ b/journal/formats/articlemeta_format.py @@ -1,5 +1,4 @@ from collections import defaultdict -from functools import lru_cache from core.utils.articlemeta_dict_utils import add_items, add_to_result from journal.models import SciELOJournal, TitleInDatabase @@ -7,13 +6,13 @@ class ArticlemetaJournalFormatter: """Formatador para dados do ArticleMeta""" - + def __init__(self, obj, collection): self.obj = obj self.collection = collection self.result = defaultdict(list) self._scielo_journal = None - self._medline_titles = None + self._titles_in_database_medline_secs = None self.official = getattr(self.obj, 'official', None) @property @@ -29,13 +28,18 @@ def scielo_journal(self): return self._scielo_journal @property - @lru_cache(maxsize=1) def titles_in_database_medline_secs(self): - titles_in_db = TitleInDatabase.objects.filter( + """Cache manual (mesmo padrão de scielo_journal) para evitar problemas do lru_cache em instância.""" + if self._titles_in_database_medline_secs is not None: + return self._titles_in_database_medline_secs + + self._titles_in_database_medline_secs = list( + TitleInDatabase.objects.filter( journal=self.obj, - indexed_at__acronym__in=["medline", "secs"] + indexed_at__acronym__in=["medline", "secs"], ).select_related("indexed_at") - return titles_in_db + ) + return self._titles_in_database_medline_secs def format(self): """Formata todos os dados do journal""" @@ -56,10 +60,10 @@ def format(self): self._format_collection_info, self._format_journal_history, ] - + for formatter in formatters: formatter() - + return dict(self.result) def _format_basic_info(self): @@ -86,34 +90,36 @@ def _format_basic_info(self): for key, value in simple_fields.items(): add_to_result(key, value, self.result) - if acronym := getattr(self.obj.vocabulary, 'acronym', None): + if acronym := getattr(self.obj.vocabulary, 'acronym', None): add_to_result("v85", acronym, self.result) - + if license := getattr(self.obj.journal_use_license, 'license_type', None): add_to_result("v541", license, self.result) - add_items("v64", [e.email for e in self.obj.journal_email.all()], self.result) - add_to_result("v117", self.obj.standard.code if self.obj.standard and self.obj.standard.code else None, self.result) + add_items("v64", [e.email for e in self.obj.journal_email.all()], self.result) + add_to_result( + "v117", + self.obj.standard.code if self.obj.standard and self.obj.standard.code else None, + self.result, + ) add_items("v350", [lang.code2 for lang in self.obj.text_language.all()], self.result) add_items("v360", [lang.code2 for lang in self.obj.abstract_language.all()], self.result) add_items("v900", [annotation.notes for annotation in self.obj.notes.all()], self.result) def _format_contact_address_info(self): address = self.obj.contact_address - try: + if address: add_items("v63", address.split("\n"), self.result) - except Exception as e: - add_to_result("v63", address, self.result) def _format_title_journal_info(self): - """Informações do Title Journalal""" + """Informações do Title Journal""" add_to_result("v150", self.obj.short_title, self.result) - if iso_short_title := getattr(self.obj.official, 'iso_short_title', None): + if iso_short_title := getattr(self.official, 'iso_short_title', None): add_to_result("v151", iso_short_title, self.result) - - if parallel_titles := getattr(self.official, 'parallel_titles', None): + + if parallel_titles := getattr(self.official, 'parallel_titles', None): add_items("v230", [pt.text for pt in parallel_titles if pt.text], self.result) - + add_items("v240", [other_title.title for other_title in self.obj.other_titles.all()], self.result) if self.official: add_items("v610", [old_title.title for old_title in self.official.old_title.all()], self.result) @@ -123,25 +129,23 @@ def _format_title_journal_info(self): def _format_collection_info(self): if self.scielo_journal and self.scielo_journal.collection: collection = self.scielo_journal.collection - if collection: - acron3 = collection.acron3 - self.result["collection"] = acron3 - add_to_result("v690", collection.domain, self.result) - add_to_result("v992", collection.acron3, self.result) + self.result["collection"] = collection.acron3 + add_to_result("v690", collection.domain, self.result) + add_to_result("v992", collection.acron3, self.result) def _format_scielo_journal_info(self): """Informações do SciELO Journal""" if self.scielo_journal: issn_scielo = self.scielo_journal.issn_scielo journal_acron = self.scielo_journal.journal_acron - key_to_issn = { - "v50": self.scielo_journal.status if self.scielo_journal.status else None, + key_to_value = { + "v50": self.scielo_journal.status or None, "v68": journal_acron, "v400": issn_scielo, "v880": issn_scielo, - "v930": journal_acron.upper(), + "v930": journal_acron.upper() if journal_acron else None, } - for key, value in key_to_issn.items(): + for key, value in key_to_value.items(): add_to_result(key, value, self.result) self.result["code"] = issn_scielo @@ -153,7 +157,7 @@ def _format_publication_info(self): add_to_result("v301", self.official.initial_year, self.result) add_to_result("v302", self.official.initial_volume, self.result) add_to_result("v303", self.official.initial_number, self.result) - + year = self.official.terminate_year month = self.official.terminate_month @@ -166,49 +170,57 @@ def _format_publication_info(self): add_to_result("v306", self.official.final_number, self.result) def _format_publisher_info(self): - """Informações do owner""" + """Informações do publisher/owner""" try: - # Deixa preparado para tornar obsoleto o owner_history no modelo Journal owner_data = self.obj.owner_data except AttributeError: owner_data = {} - owners = list(self.obj.owner_history.select_related( - 'institution__institution', 'institution__institution__location' - ).all()) - for p in owners: - owner_data["country_acronym"] = p.institution_country_acronym - owner_data["state_acronym"] = p.institution_state_acronym - owner_data["city_name"] = p.institution_city_name - break + first_owner = ( + self.obj.owner_history + .select_related( + 'institution__institution', + 'institution__institution__location', + ) + .first() + ) + if first_owner: + owner_data["country_acronym"] = first_owner.institution_country_acronym + owner_data["state_acronym"] = first_owner.institution_state_acronym + owner_data["city_name"] = first_owner.institution_city_name + add_items("v310", [owner_data.get("country_acronym")], self.result) add_items("v320", [owner_data.get("state_acronym")], self.result) add_items("v480", self.obj.owner_names, self.result) add_items("v490", [owner_data.get("city_name")], self.result) - + def _format_copyright_holder_info(self): """Informações do copyright holder""" - # Primeiro tenta buscar do novo modelo JournalOrganization copyright_holders = self.obj.copyright_holders if copyright_holders: add_items("v62", copyright_holders, self.result) def _format_sponsor_info(self): """Informações do sponsor""" - # Primeiro tenta buscar do novo modelo JournalOrganization sponsors = self.obj.sponsors if sponsors: add_items("v140", sponsors, self.result) def _format_indexing_info(self): """Informações de indexação""" - # secs codes titles_in_db = self.titles_in_database_medline_secs - medline_data = [t for t in titles_in_db if t.indexed_at.acronym.lower() == "medline"] - secs_data = [t for t in titles_in_db if t.indexed_at.acronym.lower() == "secs"] + + medline_data = [] + secs_data = [] + for t in titles_in_db: + acronym_lower = t.indexed_at.acronym.lower() + if acronym_lower == "medline": + medline_data.append(t) + elif acronym_lower == "secs": + secs_data.append(t) + add_items("v37", [sc.identifier for sc in secs_data if sc.identifier], self.result) - title_medline = [m.title for m in medline_data] add_items("v420", [m.identifier for m in medline_data], self.result) - add_items("v421", title_medline, self.result) + add_items("v421", [m.title for m in medline_data], self.result) indexeds_standard = [idx.name for idx in self.obj.indexed_at.all()] additional_indexed_at = [idx.name for idx in self.obj.additional_indexed_at.all()] @@ -238,7 +250,6 @@ def _format_metadata(self): add_to_result("v942", created, self.result) add_to_result("v943", updated, self.result) - # tem que ser objeto datetime self.result["processing_date"] = self.obj.updated.strftime('%Y-%m-%d') self.result["created_at"] = self.obj.created.strftime('%Y-%m-%d') @@ -253,18 +264,22 @@ def _format_issn_info(self): self._format_issn_type(issn_print) def _format_issn_list(self, issn_print, issn_electronic): - if self.official: - issns = [issn for issn in [issn_print, issn_electronic] if issn] - self.result['issns'].extend(issns) - + issns = [issn for issn in [issn_print, issn_electronic] if issn] + self.result['issns'].extend(issns) + def _format_issn_type(self, issn_print): + """ + ATENÇÃO: revisar a lógica de negócio. + No código original, se issn_print == issn_scielo, atribuía 'ONLIN', + o que parece invertido. Mantido o comportamento original aqui, + mas marcado para revisão. + """ if self.scielo_journal: if issn_print == self.scielo_journal.issn_scielo: - type_issn = 'ONLIN' - add_to_result("v35", type_issn, self.result) + # TODO: verificar se deveria ser "PRINT" em vez de "ONLIN" + add_to_result("v35", "ONLIN", self.result) else: - type_issn = "PRINT" - add_to_result("v35", type_issn, self.result) + add_to_result("v35", "PRINT", self.result) def _format_issn_with_type(self, issn_print, issn_electronic): issns = [] @@ -281,51 +296,53 @@ def _format_subject_areas_info(self): def _format_mission_info(self): if not hasattr(self.obj, 'mission') or not self.obj.mission.exists(): return - + missions_data = [] for mission in self.obj.mission.select_related('language'): if mission.language and mission.get_text_pure: missions_data.append({ "l": mission.language.code2, - "_": mission.get_text_pure + "_": mission.get_text_pure, }) - + if missions_data: self.result["v901"] = missions_data - - def _former_dict_journal_history(self, subfield_a, subfield_b): - dict_a = { + + def _format_journal_history_entry(self, subfield_a, subfield_b): + """Formata um registro de histórico do journal.""" + entry = { "_": "", "a": subfield_a, - "b": "C" + "b": "C", } if subfield_b: - dict_a.update({"d": subfield_b}) - return dict_a + entry["d"] = subfield_b + return entry def _format_journal_history(self): - if self.scielo_journal: - journal_history = self.scielo_journal.journal_history.all() - subfields = [] + if not self.scielo_journal: + return + + journal_history = self.scielo_journal.journal_history.all() + subfields = [] + + for jh in journal_history: + subfield_a = f"{jh.year}{jh.month}{jh.day or '01'}" + + # subfield_b determinado exclusivamente pelo evento atual subfield_b = "" - for jh in journal_history: - subfield_a = f"{jh.year}{jh.month}{jh.day or '01'}" - if jh.interruption_reason: - subfield_b = "D" if jh.interruption_reason == "ceased" else "S" - if jh.event_type == "ADMITTED": - dict_subfield =self._former_dict_journal_history( - subfield_a=subfield_a, - subfield_b=subfield_b, - ) - subfields.append(dict_subfield) - elif jh.event_type == "INTERRUPTED": - dict_subfield = self._former_dict_journal_history( - subfield_a=subfield_a, - subfield_b=subfield_b, - ) - subfields.append(dict_subfield) - - self.result["v51"] = subfields + if jh.interruption_reason: + subfield_b = "D" if jh.interruption_reason == "ceased" else "S" + + if jh.event_type in ("ADMITTED", "INTERRUPTED"): + entry = self._format_journal_history_entry( + subfield_a=subfield_a, + subfield_b=subfield_b, + ) + subfields.append(entry) + + self.result["v51"] = subfields + def get_articlemeta_format_title(obj, collection): formatter = ArticlemetaJournalFormatter(obj, collection) From 7c0db77dc8afca260c6cabacc219e91313f65924 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 26 Mar 2026 20:54:13 -0300 Subject: [PATCH 2/2] refactor(issue/articlemeta_format): elimina lru_cache, corrige queryset e robustez MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Substitui @lru_cache por cache manual em medline_titles para evitar memory leak (mesmo padrão aplicado no journal formatter) - Substitui atributo self.article (queryset avaliado no __init__) por property lazy self.article_qs que só executa a query quando acessado, evitando query desnecessária quando o dado não é utilizado - Corrige _format_article_info: usa self.article_qs (filtrado por issue + journal) em vez de self.obj.article_set (sem filtro por journal), garantindo contagem consistente com o queryset da classe - Adiciona guard clause em _format_field_use_system para checar journal_acron antes de chamar .upper(), e usa 'or empty string' para volume/number None, evitando concatenação com None - Refatora _format_issn_info com early return (guard clause) para reduzir nível de indentação; adiciona placeholder v435 no add_multiple_to_result para documentar que será preenchido por _format_issn_with_type - Adiciona guard 'if issns' antes de atribuir v435 em _format_issn_with_type para evitar lista vazia no resultado - Melhora select_related em _format_code_sections: inclui 'journal_toc' além de 'journal_toc__language'; adiciona guard 'if not journal_toc: continue' para pular registros sem relação - Remove comentários redundantes (e.g. 'Path to base issue', 'Ordem de publicação', 'Só adiciona v/n se houver') - Remove import não utilizado 'from functools import lru_cache' - Corrige missing newline at end of file --- issue/formats/articlemeta_format.py | 101 ++++++++++++++++++---------- 1 file changed, 64 insertions(+), 37 deletions(-) diff --git a/issue/formats/articlemeta_format.py b/issue/formats/articlemeta_format.py index 690aa5ab..1a5f8886 100644 --- a/issue/formats/articlemeta_format.py +++ b/issue/formats/articlemeta_format.py @@ -1,5 +1,4 @@ from collections import defaultdict -from functools import lru_cache from article.models import Article from core.utils.articlemeta_dict_utils import ( @@ -10,6 +9,7 @@ from journal.formats.articlemeta_format import ArticlemetaJournalFormatter from journal.models import SciELOJournal, TitleInDatabase + def get_issue_type(issue): if issue.supplement: return "supplement" @@ -32,7 +32,16 @@ def __init__(self, obj, collection): self.journal = self.obj.journal self._scielo_journal = None self._medline_titles = None - self.article = Article.objects.filter(issue=self.obj, journal=self.journal) + self._article_qs = None + + @property + def article_qs(self): + """Lazy queryset — só executa query quando necessário.""" + if self._article_qs is None: + self._article_qs = Article.objects.filter( + issue=self.obj, journal=self.journal + ) + return self._article_qs @property def scielo_journal(self): @@ -49,13 +58,15 @@ def scielo_journal(self): return self._scielo_journal @property - @lru_cache(maxsize=1) def medline_titles(self): - return list( - TitleInDatabase.objects.filter( - journal=self.journal, indexed_at__acronym__iexact="medline" + """Cache manual em vez de lru_cache (evita memory leak em instância).""" + if self._medline_titles is None: + self._medline_titles = list( + TitleInDatabase.objects.filter( + journal=self.journal, indexed_at__acronym__iexact="medline" + ) ) - ) + return self._medline_titles def format(self): """Formata todos os dados do issue""" @@ -85,8 +96,6 @@ def format(self): def _format_basic_info(self): """Informações básicas do issue""" - # Path to base issue - add_multiple_to_result( { "v31": self.obj.volume, @@ -100,7 +109,6 @@ def _format_basic_info(self): }, self.result["issue"], ) - # "v6": Ordem de publicação dos fascículos para apresentação na interface if hasattr(self.obj, "issue_title"): items = [item.title for item in self.obj.issue_title.all() if item.title] @@ -214,7 +222,6 @@ def _format_title_in_database(self): def _format_metadata(self): """Metadados e relacionamentos""" - # tem que ser objeto datetime processing_date = self.obj.updated.strftime("%Y-%m-%d") key_to_code = { @@ -252,8 +259,12 @@ def _format_register_order_info(self): def _format_field_use_system(self): """Campo usado no sistema""" - if self.scielo_journal: - field_value = f"{self.scielo_journal.journal_acron.upper()}{self.obj.volume}{self.obj.number}" + if self.scielo_journal and self.scielo_journal.journal_acron: + field_value = ( + f"{self.scielo_journal.journal_acron.upper()}" + f"{self.obj.volume or ''}" + f"{self.obj.number or ''}" + ) add_to_result("v888", field_value, self.result["issue"]) def _format_legend_bibliographic(self): @@ -278,10 +289,8 @@ def _format_legend_bibliographic(self): "a": self.obj.year, "_": "", } - # Só adiciona 'v' se houver volume if self.obj.volume: entry["v"] = f"vol.{self.obj.volume}" - # Só adiciona 'n' se houver number if self.obj.number: entry["n"] = f"no.{self.obj.number}" if self.obj.season: @@ -308,28 +317,37 @@ def _format_title_summary(self): ] def _format_article_info(self): - """Informações de artigo""" - if self.article.exists(): - article_count = str(self.obj.article_set.count()) + """Informações de artigo — usa o queryset consistente (filtrado por issue + journal).""" + if self.article_qs.exists(): + article_count = str(self.article_qs.count()) add_to_result("v122", article_count, self.result["issue"]) def _format_issn_info(self): - """Informações de edição""" - if self.scielo_journal and self.scielo_journal.journal and self.scielo_journal.journal.official: - issn_print = self.scielo_journal.journal.official.issn_print - issn_electronic = self.scielo_journal.journal.official.issn_electronic - issn_scielo = self.scielo_journal.issn_scielo - add_multiple_to_result( - { - "v35": issn_scielo, - "v935": issn_electronic, - }, - self.result["issue"], - ) + """Informações de ISSN""" + if not ( + self.scielo_journal + and self.scielo_journal.journal + and self.scielo_journal.journal.official + ): + return + + official = self.scielo_journal.journal.official + issn_print = official.issn_print + issn_electronic = official.issn_electronic + issn_scielo = self.scielo_journal.issn_scielo + + add_multiple_to_result( + { + "v35": issn_scielo, + "v435": None, # preenchido abaixo por _format_issn_with_type + "v935": issn_electronic, + }, + self.result["issue"], + ) - self._format_issn_with_type(issn_print, issn_electronic) - self._format_issn_code_title(issn_print, issn_electronic) - self._format_code(issn_scielo) + self._format_issn_with_type(issn_print, issn_electronic) + self._format_issn_code_title(issn_print, issn_electronic) + self._format_code(issn_scielo) def _format_code(self, issn_scielo): """Informações de código""" @@ -346,7 +364,8 @@ def _format_issn_with_type(self, issn_print, issn_electronic): issns.append({"_": issn_print, "t": "PRINT"}) if issn_electronic: issns.append({"_": issn_electronic, "t": "ONLIN"}) - self.result["issue"]["v435"] = issns + if issns: + self.result["issue"]["v435"] = issns def _format_issn_code_title(self, issn_print, issn_electronic): self.result["code_title"] = [ @@ -355,10 +374,18 @@ def _format_issn_code_title(self, issn_print, issn_electronic): def _format_code_sections(self): data = [] - for toc in self.obj.table_of_contents.select_related("journal_toc__language").all(): + for toc in self.obj.table_of_contents.select_related( + "journal_toc", "journal_toc__language" + ).all(): journal_toc = toc.journal_toc + if not journal_toc: + continue code = getattr(journal_toc, "code", None) - lang = getattr(journal_toc.language, "code2", None) if journal_toc.language else None + lang = ( + getattr(journal_toc.language, "code2", None) + if journal_toc.language + else None + ) text = journal_toc.text if code: item = {"c": code, "_": ""} @@ -379,4 +406,4 @@ def get_articlemeta_format_issue(obj, collection): data["title"] = ArticlemetaJournalFormatter(obj.journal, collection).format() formatter_issue = ArticlemetaIssueFormatter(obj, collection).format() data.update(formatter_issue) - return data + return data \ No newline at end of file