Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: figshare-cache
name: lcas-figshare-cache

on:
workflow_dispatch:
Expand All @@ -20,6 +20,10 @@ on:
branches:
- main

concurrency:
group: lcas-figshare-processing-${{ github.ref }}
cancel-in-progress: true

jobs:
update-cache:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -67,10 +71,10 @@ jobs:
cd ./output
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ github.event.inputs.use_author_cache }}" = "true" ]; then
echo "Running figshare_fetch.py with --use-author-cache (manually triggered)"
python ../figshare_fetch.py --use-author-cache
python ../figshare_fetch.py --use-author-cache -f ../lcas-authors.txt --max-retries 30 --rate-limit-delay 0.1
else
echo "Running figshare_fetch.py without cache (default behavior)"
python ../figshare_fetch.py --rate-limit-delay 1 --max-retries 30
python ../figshare_fetch.py -f ../lcas-authors.txt --max-retries 30 --rate-limit-delay 0.1
fi

- name: Run figshare bibtex (Step 2 - Generate bibtex from CSV)
Expand All @@ -81,6 +85,7 @@ jobs:
python ../figshare_bibtex.py

- name: Save Cache from folder ./output
if: always()
uses: actions/cache/save@v5
with:
path: ./output
Expand Down
146 changes: 146 additions & 0 deletions .github/workflows/uoa11-figshare-processing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
name: uoa11-figshare-cache

on:
workflow_dispatch:
inputs:
use_author_cache:
description: 'Use cached author data (instead of refreshing)'
required: false
default: 'false'
type: choice
options:
- 'true'
- 'false'
# schedule:
# - cron: "30 */12 * * *"
# push:
# branches:
# - main
pull_request:
branches:
- main

concurrency:
group: uoa11-figshare-processing-${{ github.ref }}
cancel-in-progress: true

jobs:
update-cache:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 1

- name: Use Cache in folder ./output
id: cache-restore-output
uses: actions/cache/restore@v5
with:
path: ./output
key: uoa11-cache-files-${{ github.run_id }}
restore-keys: |
uoa11-cache-files-

- name: Create output directory if it doesn't exist
run: |
mkdir -p output
find ./output

- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."

- run: |
git config --global user.name 'L-CAS GitHub'
git config --global user.email 'marc@hanheide.net'

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
set -e
python -m pip install --upgrade pip
pip install -r requirements-frozen.txt

- name: Run figshare fetch (Step 1 - Retrieve articles and create CSV)
env:
FIGSHARE_TOKEN: ${{ secrets.FIGSHARE_TOKEN }}
run: |
set -e
cd ./output
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ github.event.inputs.use_author_cache }}" = "true" ]; then
echo "Running figshare_fetch.py with --use-author-cache (manually triggered)"
python ../figshare_fetch.py -f ../uoa11-authors.txt --use-author-cache --max-retries 30 --rate-limit-delay 0.1 -o uoa11-figshare_articles.csv -O uoa11-figshare_articles_all.csv
else
echo "Running figshare_fetch.py without cache (default behavior)"
python ../figshare_fetch.py -f ../uoa11-authors.txt --rate-limit-delay 0.1 --max-retries 30 -o uoa11-figshare_articles.csv -O uoa11-figshare_articles_all.csv
fi

- name: Run figshare bibtex (Step 2 - Generate bibtex from CSV)
run: |
set -e
cd ./output
echo "Running figshare_bibtex.py to generate bibtex from CSV"
python ../figshare_bibtex.py -i uoa11-figshare_articles.csv -o uoa11.bib

- name: Save Cache from folder ./output
if: always()
uses: actions/cache/save@v5
with:
path: ./output
key: ${{ steps.cache-restore-output.outputs.cache-primary-key || 'uoa11-cache-files' }}

- name: Generate publication statistics
run: |
cd ./output
python ../generate_stats.py --all-csv uoa11-figshare_articles_all.csv --dedup-csv uoa11-figshare_articles.csv >> $GITHUB_STEP_SUMMARY

- name: Nexus Repo Publish bibtex
if: ${{ github.event_name != 'pull_request' }}
uses: sonatype-nexus-community/nexus-repo-github-action@master
with:
serverUrl: https://lcas.lincoln.ac.uk/repository/
username: ${{ secrets.LCAS_REGISTRY_PUSHER }}
password: ${{ secrets.LCAS_REGISTRY_TOKEN }}
format: raw
repository: misc
coordinates: directory=bibtex
assets: filename=uoa11.bib
filename: ./output/uoa11.bib

- name: Nexus Repo Publish figshare articles without duplicates CSV
if: ${{ github.event_name != 'pull_request' }}
uses: sonatype-nexus-community/nexus-repo-github-action@master
with:
serverUrl: https://lcas.lincoln.ac.uk/repository/
username: ${{ secrets.LCAS_REGISTRY_PUSHER }}
password: ${{ secrets.LCAS_REGISTRY_TOKEN }}
format: raw
repository: misc
coordinates: directory=bibtex
assets: filename=uoa11-figshare_articles.csv
filename: ./output/uoa11-figshare_articles.csv

- name: Nexus Repo Publish all figshare articles CSV
if: ${{ github.event_name != 'pull_request' }}
uses: sonatype-nexus-community/nexus-repo-github-action@master
with:
serverUrl: https://lcas.lincoln.ac.uk/repository/
username: ${{ secrets.LCAS_REGISTRY_PUSHER }}
password: ${{ secrets.LCAS_REGISTRY_TOKEN }}
format: raw
repository: misc
coordinates: directory=bibtex
assets: filename=uoa11-figshare_articles_all.csv
filename: ./output/uoa11-figshare_articles_all.csv

- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: outputs
path: |
./output/*.csv
./output/*.bib
retention-days: 30
12 changes: 12 additions & 0 deletions doi2bib.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def get_bibtext(self, doi):
self.logger.warning(f"failed to get bibtex for {doi}, status code {response.status_code}")
return ""
bibtext = response.text

if bibtext:
self.logger.debug(f"bibtex for {doi} found, caching it")
cache[doi] = bibtext
Expand All @@ -84,6 +85,17 @@ def get_bibtex_entry(self, doi):
parser.ignore_nonstandard_types = False
bibdb = bibtexparser.loads(bibtext, parser)
entry, = bibdb.entries

# Correct @inbook entries that should be @inproceedings
# Conference papers often have booktitle but no chapter field
if entry.get('ENTRYTYPE', '').lower() == 'inbook':
has_booktitle = 'booktitle' in entry
has_chapter = 'chapter' in entry
# If it has a booktitle but no chapter, it's likely a proceedings paper
if has_booktitle and not has_chapter:
self.logger.info(f"Converting @inbook to @inproceedings for {doi}")
entry['ENTRYTYPE'] = 'inproceedings'

quoted_doi = urllib.request.quote(doi)
entry['link'] = 'https://doi.org/{}'.format(quoted_doi)
if 'author' in entry:
Expand Down
2 changes: 1 addition & 1 deletion doi_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def guess_doi_from_crossref(title, author):
# Construct query URL for Crossref API
base_url = "https://api.crossref.org/works"
params = {
"query.query.bibliographic": f"{title}",
"query.bibliographic": f"{title}",
"query.author": f"{author}",
"sort": "relevance",
"rows": 10, # Get top 10 matches
Expand Down
23 changes: 23 additions & 0 deletions lcas-authors.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Marc Hanheide
Marcello Calisti
Grzegorz Cielniak
Simon Parsons
Elizabeth Sklar
Paul Baxter
Petra Bosilj
Heriberto Cuayahuitl
Gautham Das
Francesco Del Duchetto
Charles Fox
Leonardo Guevara,
Helen Harman
Mohammed Al-Khafajiy
Alexandr Klimchik
Riccardo Polvara
Athanasios Polydoros
Zied Tayeb
Sepehr Maleki
Junfeng Gao
Tom Duckett
Mini Rai
Amir Ghalamzan Esfahani
35 changes: 35 additions & 0 deletions uoa11-authors.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Paul Baxter
Leonardo Guevara
Miao Yu
Francesco Del Duchetto
Alexandr Klimchik
Abimbola Sangodoyin
Athanasios Polydoros
Heriberto Cuayahuitl
Fiona Strens
Gautham Das
Olivier Szymanezyk
John Atanbori
Hamna Aslam
Themis Papaioannou
Bashir Al-Diri
Khaled Bachour
Riccardo Polvara
Ionut Moraru
Renata Ntelia
Charles Fox
Simon Parsons
Mohammed Al-Khafajiy
James Brown
Mark Doughty
Christos Frantzidis
Wenting Duan
Yvonne James
Kabiru Maiyama
Mamatha Thota
Patrick Dickinson
Helen Harman
Marc Hanheide
Elizabeth Sklar
Grzegorz Cielniak