Skip to content

Commit f2a4a5b

Browse files
authored
Merge pull request hastagAB#26 from GhostofGoes/master
Add Slideshare-Downloader script to download SlideShare presentations
2 parents 8021694 + 9792e99 commit f2a4a5b

File tree

3 files changed

+100
-0
lines changed

3 files changed

+100
-0
lines changed

Slideshare-Downloader/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Slideshare-Downloader
2+
Download slides from slideshows shared on SlideShare (Now LinkedIn SlideShare) as a PDF.
3+
4+
# Usage
5+
This was written for Python 3, but it should work with Python 2.7 as well.
6+
7+
## Installation
8+
### Linux/Mac
9+
```bash
10+
python3 -m pip install --user -U -r requirements.txt
11+
python3 slideshare_downloader.py --help
12+
```
13+
14+
### Windows
15+
```powershell
16+
py -3 -m pip install --user -U -r requirements.txt
17+
py -3 slideshare_downloader.py --help
18+
```
19+
20+
## Running
21+
```bash
22+
slideshare_downloader.py -f some_slides -u http://www.slideshare.net/codeblue_jp/igor-skochinsky-enpub
23+
```
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
beautifulsoup4>=4.0.0
2+
requests>=2.0.0
3+
img2pdf>=0.2.1
4+
docopt>=0.6.0
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
# Credit for base code goes to: yodiaditya
4+
# https://github.com/yodiaditya/slideshare-downloader/blob/master/convertpdf.py
5+
6+
"""SlideShare Downloader.
7+
8+
Usage:
9+
slideshare_downloader.py [options]
10+
11+
Options:
12+
-h, --help Show this screen
13+
-f <file> Specify output filename
14+
-u <url> URL to download
15+
"""
16+
17+
import img2pdf
18+
from docopt import docopt
19+
20+
from os import walk, mkdir, chdir, getcwd
21+
from os.path import join
22+
23+
from urllib.parse import urlparse
24+
from urllib.request import urlopen
25+
from bs4 import BeautifulSoup
26+
from requests import get
27+
28+
29+
class SlideShare:
30+
""" Download slides from SlideShare and convert them into a PDF. """
31+
def __init__(self):
32+
self.TOP_DIR = getcwd()
33+
34+
def get_slides(self, download_url=None, filename=None):
35+
if download_url:
36+
i_dir = self.download_images(download_url)
37+
else:
38+
i_dir = self.download_images(input('SlideShare full URL (including "http://"): '))
39+
if filename:
40+
self.create_pdf(i_dir, filename + '.pdf')
41+
else:
42+
self.create_pdf(i_dir, i_dir + '.pdf')
43+
44+
@staticmethod
45+
def download_images(page_url):
46+
html = urlopen(page_url).read()
47+
soup = BeautifulSoup(html, 'html.parser')
48+
images = soup.findAll('img', {'class': 'slide_image'}) # Parse out the slide images
49+
image_dir = soup.title.string.strip(' \t\r\n').lower().replace(' ', '-') # Get name of the slide deck
50+
try:
51+
mkdir(image_dir) # Create the folder for our images
52+
except FileExistsError:
53+
print("The directory '%s' already exists. Assuming PDF rebuild, continuing with existing contents...\n"
54+
"Delete the directory to re-download the slide images." % image_dir)
55+
return image_dir
56+
chdir(image_dir) # Change to image folder so we don't pollute starting folder
57+
for image in images:
58+
image_url = image.get('data-full').split('?')[0]
59+
with open(urlparse(image_url).path.split('/')[-1], "wb") as file:
60+
response = get(image_url)
61+
file.write(response.content)
62+
return image_dir
63+
64+
def create_pdf(self, image_dir, filename):
65+
chdir(join(self.TOP_DIR, image_dir))
66+
files = next(walk(join(self.TOP_DIR, image_dir)))[2]
67+
with open(join(self.TOP_DIR, filename), "wb") as file:
68+
img2pdf.convert(*files, title=filename, outputstream=file)
69+
70+
if __name__ == "__main__":
71+
arguments = docopt(__doc__)
72+
ss = SlideShare()
73+
ss.get_slides(arguments['-u'], arguments['-f'])

0 commit comments

Comments
 (0)