Skip to content

Commit 3d94547

Browse files
committed
Merge branch 'release/1.0.8'
2 parents f4271c4 + 39a5a7c commit 3d94547

File tree

6 files changed

+118
-6
lines changed

6 files changed

+118
-6
lines changed

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ The entry point is `.src/main.py`, which can be called with the following argume
2020
| `-u` | URL path of this book |
2121
| `-k` | Copyright file |
2222
| `-a` | Donation link |
23-
| `-m` | MathJax support |
24-
| `-p` | Privacy policy URL |
23+
| `-m` | MathJax support |
24+
| `-p` | Privacy policy URL |
25+
| `-w` | Write chapter URLs to Thoth (True/False) |
2526

2627

2728
## Thoth Wrapper (Optional)
@@ -38,12 +39,15 @@ docker run --rm \
3839
-v /path/to/output:/ebook_automation/output \
3940
-e MATHJAX=False \
4041
-e PRIVACYPOLICY_URL=https://example.org \
42+
-e THOTH_EMAIL=email@example.com \
43+
-e THOTH_PWD=password \
4144
openbookpublishers/epublius \
4245
thoth_wrapper.py /ebook_automation/epub_file.epub \
4346
--doi 10.11647/obp.0275
4447
```
4548

4649
The environment variable MATHJAX enables or disable MathJax support
50+
The environment variables THOTH_EMAIL and THOTH_PWD allow use of the `--write-urls` option by providing Thoth credentials
4751

4852
## Contributing
4953

src/epublius/epublius.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def parse_args(self, argv=None):
7373
parser.add_argument('-p', '--privacy-policy',
7474
help = 'Privacy policy URL')
7575

76+
parser.add_argument('-w', '--write-urls',
77+
help = 'Write HTML chapter URLs to Thoth',
78+
default = False)
79+
7680
return parser.parse_args(argv)
7781

7882
def unzip_epub(self, prefix):

src/epublius/metadata.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,23 @@ def get_chapter_title(self):
126126

127127
return html.escape(ch_title)
128128

129+
def get_chapter_doi(self):
130+
'''
131+
Retrieve chapter DOI based on the text of <p class=doi>
132+
(this contains both copyright statement and DOI link)
133+
'''
134+
# Not all chapters will have DOIs
135+
doi = None
136+
137+
doi_node = self.soup.find('p', class_='doi')
138+
139+
if (doi_node is not None):
140+
doi_link = doi_node.a
141+
if (doi_link is not None) and (doi_link.string is not None):
142+
doi = doi_link.string
143+
144+
return doi
145+
129146
def get_css(self):
130147
'''
131148
Return a str with the CSS information of a file

src/epublius/thoth.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python3
2+
3+
from os import getenv
4+
from thothlibrary import ThothClient
5+
import urllib.parse
6+
7+
8+
class Thoth:
9+
'''
10+
Module to handle Thoth interactions
11+
'''
12+
13+
def __init__(self):
14+
self.client = ThothClient()
15+
self.logged_in = self.login()
16+
17+
def login(self):
18+
username = getenv('THOTH_EMAIL')
19+
password = getenv('THOTH_PWD')
20+
if username is None:
21+
print('[WARNING] No Thoth username provided '
22+
'(THOTH_EMAIL environment variable not set)')
23+
return False
24+
if password is None:
25+
print('[WARNING] No Thoth password provided '
26+
'(THOTH_PWD environment variable not set)')
27+
return False
28+
try:
29+
self.client.login(username, password)
30+
return True
31+
except:
32+
return False
33+
34+
def write_urls(self, metadata, book_doi):
35+
'''
36+
Write chapter Landing Page and Full Text URLs
37+
to Thoth in standard OBP format
38+
'''
39+
chapter_doi_full = metadata.get_chapter_doi()
40+
41+
# Skip chapters that don't have a DOI
42+
if chapter_doi_full is not None:
43+
work_id = self.client.work_by_doi(chapter_doi_full).workId
44+
chapter_doi = chapter_doi_full.split('doi.org/')[-1].lower()
45+
landing_page_root = (
46+
'https://www.openbookpublishers.com/books/'
47+
'{book_doi}/chapters/{chapter_doi}')
48+
49+
publication = {"workId": work_id,
50+
"publicationType": "HTML",
51+
"isbn": None,
52+
"widthMm": None,
53+
"widthIn": None,
54+
"heightMm": None,
55+
"heightIn": None,
56+
"depthMm": None,
57+
"depthIn": None,
58+
"weightG": None,
59+
"weightOz": None}
60+
publication_id = self.client.create_publication(publication)
61+
62+
location = {"publicationId": publication_id,
63+
"landingPage": landing_page_root.format(
64+
book_doi=book_doi, chapter_doi=chapter_doi),
65+
"fullTextUrl": urllib.parse.unquote_plus(
66+
metadata.get_page_url()),
67+
"locationPlatform": "OTHER",
68+
"canonical": "true"}
69+
self.client.create_location(location)
70+
71+
print('{}: URLs written to Thoth'.format(
72+
metadata.contents[metadata.index]))

src/main.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from epublius.epublius import Epublius
66
from epublius.metadata import Metadata
77
from epublius.output import Output
8+
from epublius.thoth import Thoth
9+
from thothlibrary import ThothError
810

911

1012
def main():
@@ -15,6 +17,11 @@ def main():
1517
# Create instances
1618
epublius = Epublius(work_dir)
1719
output = Output(os.path.abspath('assets/template.xhtml'))
20+
thoth = Thoth()
21+
22+
# Warn if user requested to write URLs to Thoth but Thoth login failed
23+
if epublius.argv.write_urls and not thoth.logged_in:
24+
print('[WARNING] Thoth login failed; URLs will not be written')
1825

1926
# Get book contents
2027
contents = epublius.get_contents()
@@ -57,6 +64,13 @@ def main():
5764
file_path = os.path.join(output_directory, section)
5865
output.write_file(processed_content, file_path)
5966

67+
if epublius.argv.write_urls and thoth.logged_in:
68+
# Write chapter URL metadata to Thoth
69+
try:
70+
thoth.write_urls(metadata, epublius.argv.doi)
71+
except (KeyError, ThothError) as e:
72+
# Continue on error, but display warning
73+
print('[WARNING] Error writing URLs to Thoth for {}: {}'.format(section, e))
6074

6175
# Duplicate TOC file to output_directory/main.html
6276
epublius.duplicate_contents(TOC_filepath.get('TOC_filepath'))

src/thoth_wrapper.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
def query_thoth(doi_url):
1313
thoth = ThothClient()
1414
return thoth.query('workByDoi', {'doi': f'"{doi_url}"'})
15-
15+
1616
def get_title(thoth_data):
1717
return thoth_data["fullTitle"]
1818

@@ -37,9 +37,9 @@ def get_html_pub_url(thoth_data):
3737
def run():
3838
parser = argparse.ArgumentParser(description='Thoth wrapper')
3939
parser.add_argument('epub_path', help='Path to epub file')
40-
parser.add_argument('-d', '--doi', help='Work DOI (registered in Thoth)')
40+
parser.add_argument('-d', '--doi', help='Work DOI (registered in Thoth)', required=True)
4141
args = parser.parse_args()
42-
42+
4343
doi_url = urllib.parse.urljoin('https://doi.org/', args.doi)
4444

4545
thoth_data = query_thoth(doi_url)
@@ -57,7 +57,8 @@ def run():
5757
"-t", os.path.join(epublius_dir, ""),
5858
"-d", args.doi,
5959
"-m", MATHJAX,
60-
"-p", os.getenv('PRIVACYPOLICY_URL', '#')]
60+
"-p", os.getenv('PRIVACYPOLICY_URL', '#'),
61+
"-w", 'True']
6162

6263
os.execvp(sys.executable, [exe] + args)
6364

0 commit comments

Comments
 (0)