Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
RahulShaw committed Jan 9, 2021
1 parent 40445d7 commit 09e9fce
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.git
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Download videos from ITPROTV / ITPROTV-DL

###### Python script to download videos from your ITPROTV account for offline viewing

![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)

![ITPROTV-DL](https://i.imgur.com/iW2ilOD.png)

### Requirements
- Python 3.6 and above
- BeautifulSoup - https://pypi.org/project/beautifulsoup4/
- requests - https://pypi.org/project/requests/
- html5lib - https://pypi.org/project/html5lib/
- Selenium - https://pypi.org/project/selenium/
- tqdm - https://pypi.org/project/tqdm/
- ChromeDriver - http://chromedriver.chromium.org/
- Get cookies.txt - https://bit.ly/GoogleChrome-GetCookiesTxt
- Active subscription on itpro.tv

### Usage

> Clone the repo
> Run `pip install -r requirements.txt`
> Login to itpro.tv and visit the course page e.g. https://app.itpro.tv/course/mta-security-fundamentals-98367-2018/ and with the `Get cookies.txt` extension installed, click on the icon of the extension and click on `Export`.
![Get cookies.txt](https://i.imgur.com/6QkV9RC.png)

> Rename the downloaded `itpro.tv_cookies.txt` file to `cookies.txt` and copy it to root of the cloned repo. Make sure that the name of the file is ``cookies.txt``. Repeat when you encounter an exception while downloading the videos (assuming you have an active subscription).
> course_link e.g. https://app.itpro.tv/course/mta-security-fundamentals-98367-2018/
``` python
>>> python driver.py course_link
```

I'd be gratified to have your support -

[<img src="https://i.imgur.com/ngduQd7.png">](https://www.buymeacoffee.com/RahulShaw)

28 changes: 28 additions & 0 deletions baker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json
import os


def bake():
cookies = []

if os.path.exists(os.getcwd() + os.path.sep + "cookies.txt"):
with open('cookies.txt', 'r') as f:
lines = f.readlines()
else:
raise Exception('The "cookies.txt" file was not found')

for line in lines:
if line.startswith('#') or (len(line.strip()) == 0):
pass
else:
line = line.replace('\n', '').split('\t')
if line.__len__() == 7:
cookie = dict(domain=line[0].strip(), flag=bool((line[1].strip() == 'TRUE')), path=line[2].strip(),
secure=bool((line[3].strip() == 'TRUE')), expiration=line[4].strip(), name=line[5].strip(), value=line[6].strip())
cookies.append(cookie)
else:
raise Exception('Malformed cookies.txt file')

with open('cookies.json', 'w') as f:
f.write(json.dumps(cookies, indent=4))
print("Cookies baked!")
175 changes: 175 additions & 0 deletions driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from __future__ import unicode_literals

import json
import os
import platform
import re
import sys
import time

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from tqdm import tqdm

import baker

if not (sys.version_info.major == 3 and sys.version_info.minor >= 6):
print("This script requires Python 3.6 or higher!")
print("You are using Python {}.{}".format(sys.version_info.major, sys.version_info.minor))
sys.exit(1)

if len(sys.argv) < 2:
print("URL not specified")
print("Usage -> python driver.py URL")
sys.exit(1)

print("""
8888888 88888888888 8888888b. 8888888b. .d88888b. 88888888888 888 888
888 888 888 Y88b 888 Y88b d88P" "Y88b 888 888 888
888 888 888 888 888 888 888 888 888 888 888
888 888 888 d88P 888 d88P 888 888 888 Y88b d88P
888 888 8888888P" 8888888P" 888 888 888 Y88b d88P
888 888 888 888 T88b 888 888 888 Y88o88P
888 888 888 888 T88b Y88b. .d88P d8b 888 Y888P
8888888 888 888 888 T88b "Y88888P" Y8P 888 Y8P
""")

url = sys.argv[1]

user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 ' \
'Safari/537.36 '
headers = {
'User-Agent': user_agent
}

chrome_options = Options()
chrome_options.add_argument(f'user-agent={user_agent}')
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-popup-blocking')
chrome_options.add_argument('--disable-logging')
chrome_options.add_argument('--allow-running-insecure-content')
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
chrome_options.add_argument('--headless')

if platform.system() == 'Linux':
if os.path.exists("/usr/bin/chromedriver"):
browser = webdriver.Chrome(executable_path="/usr/bin/chromedriver",
options=chrome_options)
else:
print("Chromedriver not found; expected path '/usr/bin/chromedriver'")
exit(1)
else:
if os.path.exists("C:/ChromeDriver/chromedriver.exe"):
browser = webdriver.Chrome(executable_path="C:/ChromeDriver/chromedriver.exe",
options=chrome_options)
else:
print("Chromedriver not found; expected path 'C:/ChromeDriver/chromedriver.exe'")
exit(1)

browser.set_page_load_timeout(10000)
browser.maximize_window()
browser.get("https://app.itpro.tv/login/")
browser.get(url)

print("Executing for " + url)
time.sleep(5)
print('* Trying to log in ... *')

try:
baker.bake()
with open('cookies.json') as cookie_file:
cookies = json.load(cookie_file)
for cookie in cookies:
browser.add_cookie(cookie)
except Exception as e:
browser.close()
browser.quit()
raise e

browser.get(url)
time.sleep(5)
html = browser.page_source
parsed_html = BeautifulSoup(html, 'html5lib')

if parsed_html.find(id='topAccountNav') is None:
browser.close()
browser.quit()
raise Exception(' ** Failed to log in. Please renew the "cookies.txt" file. **')
else:
print(" - Logged in!")

urls = []
lessons = []
lesson_urls = []
lesson_names = []

course_name = re.sub('[?/:\n\t]', '', parsed_html.find('h3', attrs={'class', 'mb-0'}).text)
print("Course name detected as " + course_name)

browser.execute_script("return document.querySelectorAll('.notCurrentTopic').forEach(e => e.click())")

time.sleep(10)

parsed_html = BeautifulSoup(browser.page_source, 'html5lib')

lesson_links = parsed_html.find_all('a', attrs={'class', 'episodeLink'})

for lesson_link in lesson_links:
lesson_urls.append('https://app.itpro.tv' + lesson_link['href'])

print("Enumerating links and sources ...")


for index, lesson_url in enumerate(lesson_urls):
browser.get(lesson_url)
temp_html = browser.page_source
temp_parsed_html = BeautifulSoup(temp_html, 'html5lib')
time.sleep(10)
while True:
try:
lessons.append(browser.execute_script("return document.getElementsByTagName('video')[0].src"))
lesson_names.append(browser.execute_script("return document.querySelector('#courseContentLayer > div > div > div.d-flex.flex-column.flex-lg-row > div.flex-grow-1 > h1').innerText"))
except Exception:
continue
break
print(f'Progress: {(index + 1)} of {len(lesson_urls)}', end='\r')

directory = os.getcwd() + os.path.sep + re.sub('[?/:\n]', '', course_name)
if not os.path.exists(directory):
os.mkdir(directory)

browser.close()
browser.quit()
print("Commencing download ...")

for index, lesson in enumerate(lessons, start=0):
try:
print(str.format('Downloading: {} ...', lesson_names[index]))
r = requests.get(lessons[index], headers=headers, stream=True)
video_file = directory + os.path.sep + str(index + 1) + ". " + re.sub('[?/:\n]', '',
lesson_names[index]) + ".mp4"
total = r.headers.get('content-length')
if not os.path.exists(video_file) or os.stat(video_file).st_size != int(total):
with open(video_file, 'wb') as f:
if total is None:
f.write(r.content)
else:
total_size = int(total, 0)
block_size = 1024
t = tqdm(total=total_size, unit='iB', unit_scale=True)
for data in r.iter_content(block_size):
t.update(len(data))
f.write(data)
t.close()
else:
print(" - {} exists. Skipping download.".format(lesson_names[index]))
except Exception as e:
print(e)

print('\n\033[92m** Downloads completed! **\033[0m\n')
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
beautifulsoup4
requests
tqdm
selenium
html5lib

0 comments on commit 09e9fce

Please sign in to comment.