First commit

RahulShaw · Jan 9, 2021 · 09e9fce · 09e9fce
1 parent 40445d7
commit 09e9fce
Show file tree

Hide file tree

Showing 5 changed files with 250 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.git
diff --git a/README.md b/README.md
@@ -0,0 +1,41 @@
+# Download videos from ITPROTV / ITPROTV-DL
+
+###### Python script to download videos from your ITPROTV account for offline viewing
+
+![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)
+
+![ITPROTV-DL](https://i.imgur.com/iW2ilOD.png)
+
+### Requirements
+- Python 3.6 and above
+- BeautifulSoup - https://pypi.org/project/beautifulsoup4/
+- requests - https://pypi.org/project/requests/
+- html5lib - https://pypi.org/project/html5lib/
+- Selenium - https://pypi.org/project/selenium/
+- tqdm - https://pypi.org/project/tqdm/
+- ChromeDriver - http://chromedriver.chromium.org/
+- Get cookies.txt - https://bit.ly/GoogleChrome-GetCookiesTxt
+- Active subscription on itpro.tv
+
+### Usage
+
+> Clone the repo
+
+> Run `pip install -r requirements.txt`
+
+> Login to itpro.tv and visit the course page e.g. https://app.itpro.tv/course/mta-security-fundamentals-98367-2018/ and with the `Get cookies.txt` extension installed, click on the icon of the extension and click on `Export`. 
+
+![Get cookies.txt](https://i.imgur.com/6QkV9RC.png)
+
+> Rename the downloaded `itpro.tv_cookies.txt` file to `cookies.txt` and copy it to root of the cloned repo. Make sure that the name of the file is ``cookies.txt``. Repeat when you encounter an exception while downloading the videos (assuming you have an active subscription).
+
+> course_link e.g. https://app.itpro.tv/course/mta-security-fundamentals-98367-2018/
+
+``` python
+>>> python driver.py course_link
+```
+
+I'd be gratified to have your support - 
+
+[<img src="https://i.imgur.com/ngduQd7.png">](https://www.buymeacoffee.com/RahulShaw)
+
diff --git a/baker.py b/baker.py
@@ -0,0 +1,28 @@
+import json
+import os
+
+
+def bake():
+    cookies = []
+
+    if os.path.exists(os.getcwd() + os.path.sep + "cookies.txt"):
+        with open('cookies.txt', 'r') as f:
+            lines = f.readlines()
+    else:
+        raise Exception('The "cookies.txt" file was not found')
+
+    for line in lines:
+        if line.startswith('#') or (len(line.strip()) == 0):
+            pass
+        else:
+            line = line.replace('\n', '').split('\t')
+            if line.__len__() == 7:
+                cookie = dict(domain=line[0].strip(), flag=bool((line[1].strip() == 'TRUE')), path=line[2].strip(),
+                              secure=bool((line[3].strip() == 'TRUE')), expiration=line[4].strip(), name=line[5].strip(), value=line[6].strip())
+                cookies.append(cookie)
+            else:
+                raise Exception('Malformed cookies.txt file')
+
+    with open('cookies.json', 'w') as f:
+        f.write(json.dumps(cookies, indent=4))
+        print("Cookies baked!")
diff --git a/driver.py b/driver.py
@@ -0,0 +1,175 @@
+from __future__ import unicode_literals
+
+import json
+import os
+import platform
+import re
+import sys
+import time
+
+import requests
+from bs4 import BeautifulSoup
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from tqdm import tqdm
+
+import baker
+
+if not (sys.version_info.major == 3 and sys.version_info.minor >= 6):
+    print("This script requires Python 3.6 or higher!")
+    print("You are using Python {}.{}".format(sys.version_info.major, sys.version_info.minor))
+    sys.exit(1)
+
+if len(sys.argv) < 2:
+    print("URL not specified")
+    print("Usage -> python driver.py URL")
+    sys.exit(1)
+
+print("""
+
+8888888 88888888888 8888888b.  8888888b.   .d88888b.     88888888888 888     888 
+  888       888     888   Y88b 888   Y88b d88P" "Y88b        888     888     888 
+  888       888     888    888 888    888 888     888        888     888     888 
+  888       888     888   d88P 888   d88P 888     888        888     Y88b   d88P 
+  888       888     8888888P"  8888888P"  888     888        888      Y88b d88P  
+  888       888     888        888 T88b   888     888        888       Y88o88P   
+  888       888     888        888  T88b  Y88b. .d88P d8b    888        Y888P    
+8888888     888     888        888   T88b  "Y88888P"  Y8P    888         Y8P     
+                                                                                                                                                                                                                                                                                                                                                                  
+""")
+
+url = sys.argv[1]
+
+user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 ' \
+             'Safari/537.36 '
+headers = {
+    'User-Agent': user_agent
+}
+
+chrome_options = Options()
+chrome_options.add_argument(f'user-agent={user_agent}')
+chrome_options.add_argument('--disable-extensions')
+chrome_options.add_argument('--no-sandbox')
+chrome_options.add_argument('--disable-gpu')
+chrome_options.add_argument('--disable-popup-blocking')
+chrome_options.add_argument('--disable-logging')
+chrome_options.add_argument('--allow-running-insecure-content')
+chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
+chrome_options.add_argument('--headless')
+
+if platform.system() == 'Linux':
+    if os.path.exists("/usr/bin/chromedriver"):
+        browser = webdriver.Chrome(executable_path="/usr/bin/chromedriver",
+                                   options=chrome_options)
+    else:
+        print("Chromedriver not found; expected path '/usr/bin/chromedriver'")
+        exit(1)
+else:
+    if os.path.exists("C:/ChromeDriver/chromedriver.exe"):
+        browser = webdriver.Chrome(executable_path="C:/ChromeDriver/chromedriver.exe",
+                                   options=chrome_options)
+    else:
+        print("Chromedriver not found; expected path 'C:/ChromeDriver/chromedriver.exe'")
+        exit(1)
+
+browser.set_page_load_timeout(10000)
+browser.maximize_window()
+browser.get("https://app.itpro.tv/login/")
+browser.get(url)
+
+print("Executing for " + url)
+time.sleep(5)
+print('* Trying to log in ... *')
+
+try:
+    baker.bake()
+    with open('cookies.json') as cookie_file:
+        cookies = json.load(cookie_file)
+    for cookie in cookies:
+        browser.add_cookie(cookie)
+except Exception as e:
+    browser.close()
+    browser.quit()
+    raise e
+
+browser.get(url)
+time.sleep(5)
+html = browser.page_source
+parsed_html = BeautifulSoup(html, 'html5lib')
+
+if parsed_html.find(id='topAccountNav') is None:
+    browser.close()
+    browser.quit()
+    raise Exception(' ** Failed to log in. Please renew the "cookies.txt" file. **')
+else:
+    print(" - Logged in!")
+
+urls = []
+lessons = []
+lesson_urls = []
+lesson_names = []
+
+course_name = re.sub('[?/:\n\t]', '', parsed_html.find('h3', attrs={'class', 'mb-0'}).text)
+print("Course name detected as " + course_name)
+
+browser.execute_script("return document.querySelectorAll('.notCurrentTopic').forEach(e => e.click())")
+
+time.sleep(10)
+
+parsed_html = BeautifulSoup(browser.page_source, 'html5lib')
+
+lesson_links = parsed_html.find_all('a', attrs={'class', 'episodeLink'})
+
+for lesson_link in lesson_links:
+    lesson_urls.append('https://app.itpro.tv' + lesson_link['href'])
+
+print("Enumerating links and sources ...")
+
+
+for index, lesson_url in enumerate(lesson_urls):
+    browser.get(lesson_url)
+    temp_html = browser.page_source
+    temp_parsed_html = BeautifulSoup(temp_html, 'html5lib')
+    time.sleep(10)
+    while True:
+        try:
+            lessons.append(browser.execute_script("return document.getElementsByTagName('video')[0].src"))
+            lesson_names.append(browser.execute_script("return document.querySelector('#courseContentLayer > div > div > div.d-flex.flex-column.flex-lg-row > div.flex-grow-1 > h1').innerText"))
+        except Exception:
+            continue
+        break
+    print(f'Progress: {(index + 1)} of {len(lesson_urls)}', end='\r')
+
+directory = os.getcwd() + os.path.sep + re.sub('[?/:\n]', '', course_name)
+if not os.path.exists(directory):
+    os.mkdir(directory)
+
+browser.close()
+browser.quit()
+print("Commencing download ...")
+
+for index, lesson in enumerate(lessons, start=0):
+    try:
+        print(str.format('Downloading: {} ...', lesson_names[index]))
+        r = requests.get(lessons[index], headers=headers, stream=True)
+        video_file = directory + os.path.sep + str(index + 1) + ". " + re.sub('[?/:\n]', '',
+                                                                              lesson_names[index]) + ".mp4"
+        total = r.headers.get('content-length')
+        if not os.path.exists(video_file) or os.stat(video_file).st_size != int(total):
+            with open(video_file, 'wb') as f:
+                if total is None:
+                    f.write(r.content)
+                else:
+                    total_size = int(total, 0)
+                    block_size = 1024
+                    t = tqdm(total=total_size, unit='iB', unit_scale=True)
+                    for data in r.iter_content(block_size):
+                        t.update(len(data))
+                        f.write(data)
+                    t.close()
+        else:
+            print(" - {} exists. Skipping download.".format(lesson_names[index]))
+    except Exception as e:
+        print(e)
+
+print('\n\033[92m** Downloads completed! **\033[0m\n')
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+beautifulsoup4
+requests
+tqdm
+selenium
+html5lib