Add function to retrive citations for arviz or methods implemented in arviz (#77)

aloctavodia · web-flow · commit 763ea944b4aa · 2025-08-13T10:39:46.000+03:00
* add function to retrive citations for arviz or methods implemented in arviz

* improve docs and bibtext format

* update api reference

* remove execution from docstring
diff --git a/docs/source/api/index.md b/docs/source/api/index.md
@@ -46,6 +46,15 @@ More coming soon...
    arviz_base.xarray_var_iter
 ```
 
+## How to cite ArviZ and implemented methods
+
+```{eval-rst}
+.. autosummary::
+   :toctree: generated/
+
+   arviz_base.citations
+```
+
 
 ## Example datasets
 The behaviour of the functions in this section is partially controlled by the
diff --git a/src/arviz_base/__init__.py b/src/arviz_base/__init__.py
@@ -7,6 +7,7 @@
 
 from arviz_base._version import __version__
 from arviz_base.base import dict_to_dataset, generate_dims_coords, make_attrs, ndarray_to_dataarray
+from arviz_base.citations import citations
 from arviz_base.converters import convert_to_dataset, convert_to_datatree
 from arviz_base.datasets import clear_data_home, get_data_home, list_datasets, load_arviz_data
 from arviz_base.io_cmdstanpy import from_cmdstanpy
@@ -29,6 +30,7 @@
 __all__ = [
     "__version__",
     # base
+    "citations",
     "dict_to_dataset",
     "generate_dims_coords",
     "make_attrs",
diff --git a/src/arviz_base/__init__.pyi b/src/arviz_base/__init__.pyi
@@ -14,6 +14,7 @@ from arviz_base.base import (
     make_attrs,
     ndarray_to_dataarray,
 )
+from arviz_base.citations import citations
 from arviz_base.converters import convert_to_dataset, convert_to_datatree
 from arviz_base.datasets import (
     clear_data_home,
@@ -38,6 +39,7 @@ from arviz_base.transform import get_unconstrained_samples
 
 __all__ = [
     "__version__",
+    "citations",
     "dict_to_dataset",
     "generate_dims_coords",
     "make_attrs",
diff --git a/src/arviz_base/citations.py b/src/arviz_base/citations.py
@@ -0,0 +1,105 @@
+"""How to cite ArviZ and its methods."""
+
+import os
+import re
+
+
+def citations(methods=None, filepath=None, format_type="bibtex"):
+    """
+    List citations for ArviZ and the methods implemented in ArviZ.
+
+    Parameters
+    ----------
+    methods : List
+        Methods implemented in ArviZ from which to retrieve citations.
+    filepath : str, optional
+        Specifies the location to save the file with the citations.
+        If ``None``, the result is returned as a string.
+    format_type : str
+       Specifies in which format the references will be displayed.
+       Currently, only "bibtex" is supported.
+
+    Examples
+    --------
+    >>> from arviz_base import citations
+    >>> from arviz_stats import rhat
+    >>> citations(methods=[rhat])  # Returns how to cite ArviZ and rhat
+    >>> citations()  # Returns how to cite ArviZ
+    """
+    method_citations = [{"doi": "10.21105/joss.XXXXX"}]
+    if methods is not None:
+        for method in methods:
+            _extract_ids_per_entry(method_citations, method.__doc__)
+
+    if format_type == "bibtex":
+        header = _get_header(methods)
+        citation_text = _find_bibtex_entries(header, method_citations)
+        if filepath:
+            with open(filepath, "w") as fw:
+                fw.write(citation_text)
+        else:
+            return citation_text
+    else:
+        raise ValueError("Invalid value for format_type. Use 'bibtex'.")
+
+
+def _extract_ids_per_entry(data, text):
+    entries = re.split(r"\n\s*\.\. \[\d+\] ", text.strip())
+
+    doi_pattern = re.compile(r"https?://doi\.org/(\S+)", re.IGNORECASE)
+    url_pattern = re.compile(r"https?://(?!doi\.org)(\S+)", re.IGNORECASE)
+
+    for entry in entries:
+        doi_match = doi_pattern.search(entry)
+        if doi_match:
+            doi = doi_match.group(1).rstrip(".")
+            data.append({"doi": doi})
+        else:
+            urls = [url.rstrip(".") for url in url_pattern.findall(entry)]
+            if urls:
+                data.append({"urls": urls})
+    return data
+
+
+def _find_bibtex_entries(header, data):
+    ref_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "references.bib")
+    with open(ref_path, encoding="utf-8") as fr:
+        bibtex_text = fr.read()
+
+    entries = re.split(r"\n(?=@)", bibtex_text)
+
+    doi_field_pattern = re.compile(r'doi\s*=\s*[{"]([^}"]+)[}"]', re.IGNORECASE)
+    url_field_pattern = re.compile(r'url\s*=\s*[{"]([^}"]+)[}"]', re.IGNORECASE)
+
+    references = [header]
+    for identifier in data:
+        found_entry = ""
+        for entry in entries:
+            bib_dois = doi_field_pattern.findall(entry)
+            bib_urls = url_field_pattern.findall(entry)
+
+            if "doi" in identifier and any(identifier["doi"] in doi for doi in bib_dois):
+                found_entry = entry.strip()
+                break
+
+            if "urls" in identifier and any(
+                any(url in b_url or b_url in url for b_url in bib_urls)
+                for url in identifier["urls"]
+            ):
+                found_entry = entry.strip()
+                break
+        if found_entry:
+            if found_entry not in references:
+                references.append(found_entry)
+
+    return "\n\n".join(references)
+
+
+def _get_header(methods=None):
+    references = "Bibtex format citations for ArviZ paper\n"
+
+    if methods is not None:
+        methods_str = ", ".join([method.__name__ for method in methods])
+        references = references.strip() + f", and\nfor the following methods: {methods_str}"
+
+    return references
diff --git a/src/arviz_base/citations.pyi b/src/arviz_base/citations.pyi
@@ -0,0 +1,14 @@
+# File generated with docstub
+
+import os
+import re
+from typing import List
+
+from _typeshed import Incomplete
+
+def citations(
+    methods: List | None = ..., filepath: str | None = ..., format_type: str = ...
+) -> None: ...
+def _extract_ids_per_entry(data: Incomplete, text: Incomplete) -> None: ...
+def _find_bibtex_entries(header: Incomplete, data: Incomplete) -> None: ...
+def _get_header(methods: Incomplete = ...) -> None: ...
diff --git a/src/arviz_base/references.bib b/src/arviz_base/references.bib