Skip to content

Commit 4beee85

Browse files
Merge pull request #7 from LCAS/uoa11
added processing for REF UoA11
2 parents 4ac278a + 8e59735 commit 4beee85

File tree

6 files changed

+225
-4
lines changed

6 files changed

+225
-4
lines changed

.github/workflows/figshare-processing.yaml renamed to .github/workflows/lcas-figshare-processing.yaml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: figshare-cache
1+
name: lcas-figshare-cache
22

33
on:
44
workflow_dispatch:
@@ -20,6 +20,10 @@ on:
2020
branches:
2121
- main
2222

23+
concurrency:
24+
group: lcas-figshare-processing-${{ github.ref }}
25+
cancel-in-progress: true
26+
2327
jobs:
2428
update-cache:
2529
runs-on: ubuntu-latest
@@ -67,10 +71,10 @@ jobs:
6771
cd ./output
6872
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ github.event.inputs.use_author_cache }}" = "true" ]; then
6973
echo "Running figshare_fetch.py with --use-author-cache (manually triggered)"
70-
python ../figshare_fetch.py --use-author-cache
74+
python ../figshare_fetch.py --use-author-cache -f ../lcas-authors.txt --max-retries 30 --rate-limit-delay 0.1
7175
else
7276
echo "Running figshare_fetch.py without cache (default behavior)"
73-
python ../figshare_fetch.py --rate-limit-delay 1 --max-retries 30
77+
python ../figshare_fetch.py -f ../lcas-authors.txt --max-retries 30 --rate-limit-delay 0.1
7478
fi
7579
7680
- name: Run figshare bibtex (Step 2 - Generate bibtex from CSV)
@@ -81,6 +85,7 @@ jobs:
8185
python ../figshare_bibtex.py
8286
8387
- name: Save Cache from folder ./output
88+
if: always()
8489
uses: actions/cache/save@v5
8590
with:
8691
path: ./output
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
name: uoa11-figshare-cache
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
use_author_cache:
7+
description: 'Use cached author data (instead of refreshing)'
8+
required: false
9+
default: 'false'
10+
type: choice
11+
options:
12+
- 'true'
13+
- 'false'
14+
# schedule:
15+
# - cron: "30 */12 * * *"
16+
# push:
17+
# branches:
18+
# - main
19+
pull_request:
20+
branches:
21+
- main
22+
23+
concurrency:
24+
group: uoa11-figshare-processing-${{ github.ref }}
25+
cancel-in-progress: true
26+
27+
jobs:
28+
update-cache:
29+
runs-on: ubuntu-latest
30+
steps:
31+
- uses: actions/checkout@v3
32+
with:
33+
fetch-depth: 1
34+
35+
- name: Use Cache in folder ./output
36+
id: cache-restore-output
37+
uses: actions/cache/restore@v5
38+
with:
39+
path: ./output
40+
key: uoa11-cache-files-${{ github.run_id }}
41+
restore-keys: |
42+
uoa11-cache-files-
43+
44+
- name: Create output directory if it doesn't exist
45+
run: |
46+
mkdir -p output
47+
find ./output
48+
49+
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
50+
51+
- run: |
52+
git config --global user.name 'L-CAS GitHub'
53+
git config --global user.email 'marc@hanheide.net'
54+
55+
- name: Set up Python
56+
uses: actions/setup-python@v4
57+
with:
58+
python-version: '3.10'
59+
60+
- name: Install dependencies
61+
run: |
62+
set -e
63+
python -m pip install --upgrade pip
64+
pip install -r requirements-frozen.txt
65+
66+
- name: Run figshare fetch (Step 1 - Retrieve articles and create CSV)
67+
env:
68+
FIGSHARE_TOKEN: ${{ secrets.FIGSHARE_TOKEN }}
69+
run: |
70+
set -e
71+
cd ./output
72+
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ github.event.inputs.use_author_cache }}" = "true" ]; then
73+
echo "Running figshare_fetch.py with --use-author-cache (manually triggered)"
74+
python ../figshare_fetch.py -f ../uoa11-authors.txt --use-author-cache --max-retries 30 --rate-limit-delay 0.1 -o uoa11-figshare_articles.csv -O uoa11-figshare_articles_all.csv
75+
else
76+
echo "Running figshare_fetch.py without cache (default behavior)"
77+
python ../figshare_fetch.py -f ../uoa11-authors.txt --rate-limit-delay 0.1 --max-retries 30 -o uoa11-figshare_articles.csv -O uoa11-figshare_articles_all.csv
78+
fi
79+
80+
- name: Run figshare bibtex (Step 2 - Generate bibtex from CSV)
81+
run: |
82+
set -e
83+
cd ./output
84+
echo "Running figshare_bibtex.py to generate bibtex from CSV"
85+
python ../figshare_bibtex.py -i uoa11-figshare_articles.csv -o uoa11.bib
86+
87+
- name: Save Cache from folder ./output
88+
if: always()
89+
uses: actions/cache/save@v5
90+
with:
91+
path: ./output
92+
key: ${{ steps.cache-restore-output.outputs.cache-primary-key || 'uoa11-cache-files' }}
93+
94+
- name: Generate publication statistics
95+
run: |
96+
cd ./output
97+
python ../generate_stats.py --all-csv uoa11-figshare_articles_all.csv --dedup-csv uoa11-figshare_articles.csv >> $GITHUB_STEP_SUMMARY
98+
99+
- name: Nexus Repo Publish bibtex
100+
if: ${{ github.event_name != 'pull_request' }}
101+
uses: sonatype-nexus-community/nexus-repo-github-action@master
102+
with:
103+
serverUrl: https://lcas.lincoln.ac.uk/repository/
104+
username: ${{ secrets.LCAS_REGISTRY_PUSHER }}
105+
password: ${{ secrets.LCAS_REGISTRY_TOKEN }}
106+
format: raw
107+
repository: misc
108+
coordinates: directory=bibtex
109+
assets: filename=uoa11.bib
110+
filename: ./output/uoa11.bib
111+
112+
- name: Nexus Repo Publish figshare articles without duplicates CSV
113+
if: ${{ github.event_name != 'pull_request' }}
114+
uses: sonatype-nexus-community/nexus-repo-github-action@master
115+
with:
116+
serverUrl: https://lcas.lincoln.ac.uk/repository/
117+
username: ${{ secrets.LCAS_REGISTRY_PUSHER }}
118+
password: ${{ secrets.LCAS_REGISTRY_TOKEN }}
119+
format: raw
120+
repository: misc
121+
coordinates: directory=bibtex
122+
assets: filename=uoa11-figshare_articles.csv
123+
filename: ./output/uoa11-figshare_articles.csv
124+
125+
- name: Nexus Repo Publish all figshare articles CSV
126+
if: ${{ github.event_name != 'pull_request' }}
127+
uses: sonatype-nexus-community/nexus-repo-github-action@master
128+
with:
129+
serverUrl: https://lcas.lincoln.ac.uk/repository/
130+
username: ${{ secrets.LCAS_REGISTRY_PUSHER }}
131+
password: ${{ secrets.LCAS_REGISTRY_TOKEN }}
132+
format: raw
133+
repository: misc
134+
coordinates: directory=bibtex
135+
assets: filename=uoa11-figshare_articles_all.csv
136+
filename: ./output/uoa11-figshare_articles_all.csv
137+
138+
- name: Upload artifacts
139+
if: always()
140+
uses: actions/upload-artifact@v4
141+
with:
142+
name: outputs
143+
path: |
144+
./output/*.csv
145+
./output/*.bib
146+
retention-days: 30

doi2bib.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def get_bibtext(self, doi):
6464
self.logger.warning(f"failed to get bibtex for {doi}, status code {response.status_code}")
6565
return ""
6666
bibtext = response.text
67+
6768
if bibtext:
6869
self.logger.debug(f"bibtex for {doi} found, caching it")
6970
cache[doi] = bibtext
@@ -84,6 +85,17 @@ def get_bibtex_entry(self, doi):
8485
parser.ignore_nonstandard_types = False
8586
bibdb = bibtexparser.loads(bibtext, parser)
8687
entry, = bibdb.entries
88+
89+
# Correct @inbook entries that should be @inproceedings
90+
# Conference papers often have booktitle but no chapter field
91+
if entry.get('ENTRYTYPE', '').lower() == 'inbook':
92+
has_booktitle = 'booktitle' in entry
93+
has_chapter = 'chapter' in entry
94+
# If it has a booktitle but no chapter, it's likely a proceedings paper
95+
if has_booktitle and not has_chapter:
96+
self.logger.info(f"Converting @inbook to @inproceedings for {doi}")
97+
entry['ENTRYTYPE'] = 'inproceedings'
98+
8799
quoted_doi = urllib.request.quote(doi)
88100
entry['link'] = 'https://doi.org/{}'.format(quoted_doi)
89101
if 'author' in entry:

doi_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def guess_doi_from_crossref(title, author):
3535
# Construct query URL for Crossref API
3636
base_url = "https://api.crossref.org/works"
3737
params = {
38-
"query.query.bibliographic": f"{title}",
38+
"query.bibliographic": f"{title}",
3939
"query.author": f"{author}",
4040
"sort": "relevance",
4141
"rows": 10, # Get top 10 matches

lcas-authors.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
Marc Hanheide
2+
Marcello Calisti
3+
Grzegorz Cielniak
4+
Simon Parsons
5+
Elizabeth Sklar
6+
Paul Baxter
7+
Petra Bosilj
8+
Heriberto Cuayahuitl
9+
Gautham Das
10+
Francesco Del Duchetto
11+
Charles Fox
12+
Leonardo Guevara,
13+
Helen Harman
14+
Mohammed Al-Khafajiy
15+
Alexandr Klimchik
16+
Riccardo Polvara
17+
Athanasios Polydoros
18+
Zied Tayeb
19+
Sepehr Maleki
20+
Junfeng Gao
21+
Tom Duckett
22+
Mini Rai
23+
Amir Ghalamzan Esfahani

uoa11-authors.txt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
Paul Baxter
2+
Leonardo Guevara
3+
Miao Yu
4+
Francesco Del Duchetto
5+
Alexandr Klimchik
6+
Abimbola Sangodoyin
7+
Athanasios Polydoros
8+
Heriberto Cuayahuitl
9+
Fiona Strens
10+
Gautham Das
11+
Olivier Szymanezyk
12+
John Atanbori
13+
Hamna Aslam
14+
Themis Papaioannou
15+
Bashir Al-Diri
16+
Khaled Bachour
17+
Riccardo Polvara
18+
Ionut Moraru
19+
Renata Ntelia
20+
Charles Fox
21+
Simon Parsons
22+
Mohammed Al-Khafajiy
23+
James Brown
24+
Mark Doughty
25+
Christos Frantzidis
26+
Wenting Duan
27+
Yvonne James
28+
Kabiru Maiyama
29+
Mamatha Thota
30+
Patrick Dickinson
31+
Helen Harman
32+
Marc Hanheide
33+
Elizabeth Sklar
34+
Grzegorz Cielniak
35+

0 commit comments

Comments
 (0)