-
Notifications
You must be signed in to change notification settings - Fork 5
/
create_bibtex.py
executable file
·84 lines (69 loc) · 2.72 KB
/
create_bibtex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python
import requests
from requests.adapters import HTTPAdapter, Retry
import datetime
from xml.etree import ElementTree
import io
import logging
import argparse
parser = argparse.ArgumentParser(description='Create bibliography from inspirehep',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog='example: create_bibtex.py --query author%3AR.Turra.1%20and%20collection%3APublished where R.Turra.1 is from here: https://inspirehep.net/authors?sort=bestmatch&size=25&page=1&q=turra')
parser.add_argument('--baseurl', default="https://inspirehep.net/api/")
parser.add_argument('--query', help='query', required=True)
args = parser.parse_args()
logger = logging.getLogger('create bibtex')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(levelname)s: %(message)s')
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)
BASEURL = args.baseurl
def build_query(**kwargs):
query = 'literature?'
query += '&'.join([k + "=" + str(v) for k, v in list(kwargs.items())])
return query
def build_all_queries(nper_step=50, **kwargs):
kwargs['size'] = nper_step
kwargs['page'] = 1
while True:
yield build_query(**kwargs)
kwargs['page'] += 1
inspire_args = {'q': args.query, 'format': 'bibtex',
#'of': 'hx', 'em': 'B', 'sf': 'year', 'so': 'd', 'rg': 5, 'tc': 'p'
}
bibtex = ""
for query in build_all_queries(**inspire_args):
url = BASEURL + query
session = requests.Session()
retry = Retry(total=3, backoff_factor=0.5, status_forcelist=[ 500, 502, 503, 504 ])
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
r = session.get(url)
if not r.status_code == requests.codes.ok:
raise IOError("cannot connect to %s, code: %s" % (url, r.status_code))
content = r.text # this is unicode
if content.count('@') == 0:
break
try:
bibtex += content
except AttributeError:
# special case for python < 2.7
def itertext(self):
tag = self.tag
if not isinstance(tag, str) and tag is not None:
return
if self.text:
yield self.text
for e in self:
for s in itertext(e):
yield s
if e.tail:
yield e.tail
bibtex += ''.join(itertext(ElementTree.fromstring(r.content)))
bibtex += r"%% ==============="
logger.info('%d items found...', bibtex.count('@'))
logger.info('%d items found', bibtex.count('@'))
with io.open('bibtex_%s.bib' % str(datetime.date.today()), 'w') as f:
f.write(bibtex)