Merge pull request hastagAB#26 from GhostofGoes/master

sjha2048 · web-flow · commit f2a4a5be6b5c · 2018-10-04T16:59:36.000+05:30
Add Slideshare-Downloader script to download SlideShare presentations
diff --git a/Slideshare-Downloader/README.md b/Slideshare-Downloader/README.md
@@ -0,0 +1,23 @@
+# Slideshare-Downloader
+Download slides from slideshows shared on SlideShare (Now LinkedIn SlideShare) as a PDF.
+
+# Usage
+This was written for Python 3, but it should work with Python 2.7 as well.
+
+## Installation 
+### Linux/Mac
+```bash
+python3 -m pip install --user -U -r requirements.txt
+python3 slideshare_downloader.py --help
+```
+
+### Windows
+```powershell
+py -3 -m pip install --user -U -r requirements.txt
+py -3 slideshare_downloader.py --help
+```
+
+## Running
+```bash
+slideshare_downloader.py -f some_slides -u http://www.slideshare.net/codeblue_jp/igor-skochinsky-enpub
+```
diff --git a/Slideshare-Downloader/requirements.txt b/Slideshare-Downloader/requirements.txt
@@ -0,0 +1,4 @@
+beautifulsoup4>=4.0.0
+requests>=2.0.0
+img2pdf>=0.2.1
+docopt>=0.6.0
diff --git a/Slideshare-Downloader/slideshare_downloader.py b/Slideshare-Downloader/slideshare_downloader.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Credit for base code goes to: yodiaditya
+# https://github.com/yodiaditya/slideshare-downloader/blob/master/convertpdf.py
+
+"""SlideShare Downloader.
+
+Usage:
+    slideshare_downloader.py [options]
+
+Options:
+    -h, --help  Show this screen
+    -f <file>   Specify output filename
+    -u <url>    URL to download
+"""
+
+import img2pdf
+from docopt import docopt
+
+from os import walk, mkdir, chdir, getcwd
+from os.path import join
+
+from urllib.parse import urlparse
+from urllib.request import urlopen
+from bs4 import BeautifulSoup
+from requests import get
+
+
+class SlideShare:
+    """ Download slides from SlideShare and convert them into a PDF. """
+    def __init__(self):
+        self.TOP_DIR = getcwd()
+
+    def get_slides(self, download_url=None, filename=None):
+        if download_url:
+            i_dir = self.download_images(download_url)
+        else:
+            i_dir = self.download_images(input('SlideShare full URL (including "http://"): '))
+        if filename:
+            self.create_pdf(i_dir, filename + '.pdf')
+        else:
+            self.create_pdf(i_dir, i_dir + '.pdf')
+
+    @staticmethod
+    def download_images(page_url):
+        html = urlopen(page_url).read()
+        soup = BeautifulSoup(html, 'html.parser')
+        images = soup.findAll('img', {'class': 'slide_image'})  # Parse out the slide images
+        image_dir = soup.title.string.strip(' \t\r\n').lower().replace(' ', '-')  # Get name of the slide deck
+        try:
+            mkdir(image_dir)  # Create the folder for our images
+        except FileExistsError:
+            print("The directory '%s' already exists. Assuming PDF rebuild, continuing with existing contents...\n"
+                  "Delete the directory to re-download the slide images." % image_dir)
+            return image_dir
+        chdir(image_dir)  # Change to image folder so we don't pollute starting folder
+        for image in images:
+            image_url = image.get('data-full').split('?')[0]
+            with open(urlparse(image_url).path.split('/')[-1], "wb") as file:
+                response = get(image_url)
+                file.write(response.content)
+        return image_dir
+
+    def create_pdf(self, image_dir, filename):
+        chdir(join(self.TOP_DIR, image_dir))
+        files = next(walk(join(self.TOP_DIR, image_dir)))[2]
+        with open(join(self.TOP_DIR, filename), "wb") as file:
+            img2pdf.convert(*files, title=filename, outputstream=file)
+
+if __name__ == "__main__":
+    arguments = docopt(__doc__)
+    ss = SlideShare()
+    ss.get_slides(arguments['-u'], arguments['-f'])