Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions opac/tests/test_main_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ def test_forbidden_json(self):
self.assertEqual(expected_msg, json_msg)

def test_page_not_found(self):
# When PREVIOUS_WEBSITE_URI is not set, should render 404 template
current_app.config["PREVIOUS_WEBSITE_URI"] = ""
response = self.client.get("/page_not_found")
self.assert_404(response)
self.assertEqual("text/html; charset=utf-8", response.content_type)
Expand All @@ -104,6 +106,8 @@ def test_page_not_found(self):
self.assertEqual(expected_msg, context_msg)

def test_page_not_found_json(self):
# JSON requests should not redirect
current_app.config["PREVIOUS_WEBSITE_URI"] = "https://old.scielo.br"
response = self.client.get(
"/page_not_found", headers={"Accept": "application/json"}
)
Expand All @@ -114,6 +118,24 @@ def test_page_not_found_json(self):
json_msg = response.json["error"]
expected_msg = "<p>%s</p>" % ERROR_MSG
self.assertEqual(expected_msg, json_msg)

def test_page_not_found_with_redirect_to_classic_site(self):
# When PREVIOUS_WEBSITE_URI is set, should redirect to classic site
classic_site_url = "https://old.scielo.br"
current_app.config["PREVIOUS_WEBSITE_URI"] = classic_site_url
response = self.client.get("/page_not_found", follow_redirects=False)
self.assertEqual(302, response.status_code)
expected_redirect_url = classic_site_url + "/page_not_found"
self.assertEqual(expected_redirect_url, response.location)

def test_page_not_found_redirect_preserves_query_string(self):
# When redirecting, should preserve query string
classic_site_url = "https://old.scielo.br"
current_app.config["PREVIOUS_WEBSITE_URI"] = classic_site_url
response = self.client.get("/page_not_found?param=value&test=123", follow_redirects=False)
self.assertEqual(302, response.status_code)
expected_redirect_url = classic_site_url + "/page_not_found?param=value&test=123"
self.assertEqual(expected_redirect_url, response.location)

def test_internal_server_error(self):
current_app.config["DEBUG"] = False
Expand Down
6 changes: 6 additions & 0 deletions opac/webapp/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@
- OPAC_SITE_LICENSE_NAME: Nome da licença (default: "Creative Common - by 4.0")
- OPAC_SITE_LICENSE_URL: URL da licença (default: https://creativecommons.org/licenses/by-nc/4.0/)
- OPAC_SITE_LICENSE_IMG_URL: Imagem da licença (default: https://licensebuttons.net/l/by/4.0/88x31.png)

- Migration & Legacy Site
- PREVIOUS_WEBSITE_URI: URL do site anterior/clássico (ex: 'https://old.scielo.br').
Quando configurado, se um recurso não for encontrado (404),
o usuário será redirecionado para a mesma URL no site clássico.
"""

PROJECT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
Expand Down Expand Up @@ -640,6 +645,7 @@
APM_VERIFY_SERVER_CERT = os.environ.get("OPAC_APM_APM_VERIFY_SERVER_CERT", "True") == "True"

# Caso queira apresentar na home do website que o atual tem versão anterior
# e redirecionar automaticamente para o site clássico quando um recurso não for encontrado (404)
PREVIOUS_WEBSITE_URI = os.environ.get("PREVIOUS_WEBSITE_URI", "")

# Caso queira apresentar na home do website qualquer mensagem de texto
Expand Down
89 changes: 87 additions & 2 deletions opac/webapp/main/errors.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# coding: utf-8
from flask import jsonify, render_template, request

from flask import current_app, jsonify, redirect, render_template, request
import re
from . import main
from .helper import build_classic_website_uri
from webapp import controllers


@main.app_errorhandler(400)
Expand Down Expand Up @@ -39,10 +41,93 @@ def page_not_found(e):
response = jsonify({"error": e})
response.status_code = 404
return response

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot muito bem! mas será necessário fazer certo tratamento dentro de opac/webapp/main/views.py para obter a URI completa de cada recurso, ou pelo menos, de journal, issue_toc, article_detail. Consegue?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implementado! ✅

Agora o sistema constrói URLs completas e específicas para o site clássico para cada tipo de recurso:

Journal (/j/abc/) → {PREVIOUS_WEBSITE_URI}/scielo.php?script=sci_serial&pid={ISSN}&lng={lang}&nrm=iso

Issue (/j/abc/i/v1n1/) → {PREVIOUS_WEBSITE_URI}/scielo.php?script=sci_issuetoc&pid={PID}&lng={lang}&nrm=iso

Article (/j/abc/a/xyz/) → {PREVIOUS_WEBSITE_URI}/scielo.php?script=sci_arttext&pid={PID}&lng={lang}&nrm=iso

O código:

  1. Detecta automaticamente o tipo de recurso baseado no padrão da URL
  2. Busca o objeto no banco (journal/issue/article)
  3. Extrai o PID/ISSN apropriado
  4. Constrói a URL do site clássico com os parâmetros corretos
  5. Se não conseguir construir a URL específica, usa o fallback (redirect simples do path)

Commit: e0870ea

# Try to redirect to the classic site if configured
classic_site_url = current_app.config.get("PREVIOUS_WEBSITE_URI", "")
if classic_site_url:
# Try to build a specific classic URL based on the resource type
classic_url = _build_classic_url_for_resource(request.path)

# If we couldn't build a specific URL, fallback to the simple redirect
if not classic_url:
classic_url = classic_site_url.rstrip("/") + request.full_path.rstrip("?")

return redirect(classic_url, code=302)

# If no classic site is configured, show the 404 page
context = {"message": e}
return render_template("errors/404.html", **context), 404


def _build_classic_url_for_resource(path):
"""
Tenta construir uma URL específica para o site clássico com base no tipo de recurso.

Args:
path: o caminho da requisição

Returns:
String com a URL completa para o site clássico ou None se não puder ser construída
"""
# Pattern para journal: /j/<url_seg>/ ou /j/<url_seg>
journal_pattern = r'^/j/([^/]+)/?$'
match = re.match(journal_pattern, path)
if match:
url_seg = match.group(1)
journal = controllers.get_journal_by_url_seg(url_seg)
if journal:
return build_classic_website_uri('journal', journal)

# Pattern para issue: /j/<url_seg>/i/<url_seg_issue>/ ou /j/<url_seg>/i/<url_seg_issue>
issue_pattern = r'^/j/([^/]+)/i/([^/]+)/?$'
match = re.match(issue_pattern, path)
if match:
url_seg = match.group(1)
url_seg_issue = match.group(2)
issue = controllers.get_issue_by_url_seg(url_seg, url_seg_issue)
if issue:
return build_classic_website_uri('issue', issue)

# Pattern para article: /j/<url_seg>/a/<article_pid_v3>/ ou /j/<url_seg>/a/<article_pid_v3>/<part>/
article_pattern = r'^/j/([^/]+)/a/([^/]+)(?:/[^/]+)?/?$'
match = re.match(article_pattern, path)
if match:
url_seg = match.group(1)
article_pid_v3 = match.group(2)
# Tenta obter o artigo pelo aid (v3 PID)
try:
from opac_schema.v1.models import Article
article = Article.objects(aid=article_pid_v3).first()
if article:
return build_classic_website_uri('article', article)
except (AttributeError, ImportError) as e:
current_app.logger.debug(f"Error loading article for classic URL: {e}")

# Pattern para PDF: /pdf/<journal_acron>/<issue_info>/<pdf_filename>.pdf
pdf_pattern = r'^/pdf/([^/]+)/([^/]+)/([^/]+)\.pdf$'
match = re.match(pdf_pattern, path)
if match:
journal_acron = match.group(1)
issue_info = match.group(2)
pdf_filename = match.group(3) + ".pdf"
# Tenta obter o artigo pelo PDF filename
try:
article = controllers.get_article_by_pdf_filename(
journal_acron, issue_info, pdf_filename
)
# Se não encontrou, tenta material suplementar
if not article:
article = controllers.get_article_by_suppl_material_filename(
journal_acron, issue_info, pdf_filename
)
if article:
return build_classic_website_uri('pdf', article)
except (AttributeError, ImportError) as e:
current_app.logger.debug(f"Error loading article PDF for classic URL: {e}")

return None


@main.app_errorhandler(500)
def internal_server_error(e):
if (
Expand Down
65 changes: 64 additions & 1 deletion opac/webapp/main/helper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import datetime
from functools import wraps
from urllib.parse import urlencode

import jwt
from flask import jsonify, request
from flask import jsonify, request, session
from flask_babelex import get_locale
from webapp import controllers
from werkzeug.security import check_password_hash
from flask import current_app
Expand Down Expand Up @@ -68,3 +70,64 @@ def auth():
),
401,
)


def build_classic_website_uri(resource_type, resource=None, **kwargs):
"""
Constrói a URI completa para o site clássico com base no tipo de recurso.

Args:
resource_type: tipo do recurso ('journal', 'issue', 'article', 'pdf')
resource: objeto do recurso (Journal, Issue, ou Article)
**kwargs: parâmetros adicionais (ex: lang)

Returns:
String com a URI completa para o site clássico ou None se não puder ser construída
"""
classic_site_url = current_app.config.get("PREVIOUS_WEBSITE_URI", "")
if not classic_site_url:
return None

# Obter idioma da sessão ou usar o padrão
lang = kwargs.get('lang')
if not lang:
lang = session.get("lang", str(get_locale()))
if lang and len(lang) > 2:
lang = lang[:2] # Converter pt_BR para pt

base_url = classic_site_url.rstrip("/")

try:
if resource_type == 'journal' and resource:
# Usa print_issn ou electronic_issn
issn = getattr(resource, 'print_issn', None) or getattr(resource, 'electronic_issn', None)
if issn:
params = {'script': 'sci_serial', 'pid': issn, 'lng': lang, 'nrm': 'iso'}
return f"{base_url}/scielo.php?{urlencode(params)}"

elif resource_type == 'issue' and resource:
# Usa o PID do issue
pid = getattr(resource, 'pid', None)
if pid:
params = {'script': 'sci_issuetoc', 'pid': pid, 'lng': lang, 'nrm': 'iso'}
return f"{base_url}/scielo.php?{urlencode(params)}"

elif resource_type == 'article' and resource:
# Usa o PID v2 do artigo
pid = getattr(resource, 'pid', None)
if pid:
params = {'script': 'sci_arttext', 'pid': pid, 'lng': lang, 'nrm': 'iso'}
return f"{base_url}/scielo.php?{urlencode(params)}"

elif resource_type == 'pdf' and resource:
# Para PDF, usa o PID v2 do artigo com script sci_pdf
pid = getattr(resource, 'pid', None)
if pid:
params = {'script': 'sci_pdf', 'pid': pid, 'lng': lang, 'nrm': 'iso'}
return f"{base_url}/scielo.php?{urlencode(params)}"

except (AttributeError, TypeError) as e:
# Se houver erro ao acessar atributos do recurso, retorna None
current_app.logger.debug(f"Error building classic URL for {resource_type}: {e}")

return None
1 change: 1 addition & 0 deletions src/opac-schema
Submodule opac-schema added at 2bb30e
1 change: 1 addition & 0 deletions src/packtools
Submodule packtools added at e3d364
1 change: 1 addition & 0 deletions src/scieloh5m5
Submodule scieloh5m5 added at ad942d