-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
250 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Download videos from ITPROTV / ITPROTV-DL | ||
|
||
###### Python script to download videos from your ITPROTV account for offline viewing | ||
|
||
 | ||
|
||
 | ||
|
||
### Requirements | ||
- Python 3.6 and above | ||
- BeautifulSoup - https://pypi.org/project/beautifulsoup4/ | ||
- requests - https://pypi.org/project/requests/ | ||
- html5lib - https://pypi.org/project/html5lib/ | ||
- Selenium - https://pypi.org/project/selenium/ | ||
- tqdm - https://pypi.org/project/tqdm/ | ||
- ChromeDriver - http://chromedriver.chromium.org/ | ||
- Get cookies.txt - https://bit.ly/GoogleChrome-GetCookiesTxt | ||
- Active subscription on itpro.tv | ||
|
||
### Usage | ||
|
||
> Clone the repo | ||
> Run `pip install -r requirements.txt` | ||
> Login to itpro.tv and visit the course page e.g. https://app.itpro.tv/course/mta-security-fundamentals-98367-2018/ and with the `Get cookies.txt` extension installed, click on the icon of the extension and click on `Export`. | ||
 | ||
|
||
> Rename the downloaded `itpro.tv_cookies.txt` file to `cookies.txt` and copy it to root of the cloned repo. Make sure that the name of the file is ``cookies.txt``. Repeat when you encounter an exception while downloading the videos (assuming you have an active subscription). | ||
> course_link e.g. https://app.itpro.tv/course/mta-security-fundamentals-98367-2018/ | ||
``` python | ||
>>> python driver.py course_link | ||
``` | ||
|
||
I'd be gratified to have your support - | ||
|
||
[<img src="https://i.imgur.com/ngduQd7.png">](https://www.buymeacoffee.com/RahulShaw) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import json | ||
import os | ||
|
||
|
||
def bake(): | ||
cookies = [] | ||
|
||
if os.path.exists(os.getcwd() + os.path.sep + "cookies.txt"): | ||
with open('cookies.txt', 'r') as f: | ||
lines = f.readlines() | ||
else: | ||
raise Exception('The "cookies.txt" file was not found') | ||
|
||
for line in lines: | ||
if line.startswith('#') or (len(line.strip()) == 0): | ||
pass | ||
else: | ||
line = line.replace('\n', '').split('\t') | ||
if line.__len__() == 7: | ||
cookie = dict(domain=line[0].strip(), flag=bool((line[1].strip() == 'TRUE')), path=line[2].strip(), | ||
secure=bool((line[3].strip() == 'TRUE')), expiration=line[4].strip(), name=line[5].strip(), value=line[6].strip()) | ||
cookies.append(cookie) | ||
else: | ||
raise Exception('Malformed cookies.txt file') | ||
|
||
with open('cookies.json', 'w') as f: | ||
f.write(json.dumps(cookies, indent=4)) | ||
print("Cookies baked!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
from __future__ import unicode_literals | ||
|
||
import json | ||
import os | ||
import platform | ||
import re | ||
import sys | ||
import time | ||
|
||
import requests | ||
from bs4 import BeautifulSoup | ||
from selenium import webdriver | ||
from selenium.webdriver.chrome.options import Options | ||
from tqdm import tqdm | ||
|
||
import baker | ||
|
||
if not (sys.version_info.major == 3 and sys.version_info.minor >= 6): | ||
print("This script requires Python 3.6 or higher!") | ||
print("You are using Python {}.{}".format(sys.version_info.major, sys.version_info.minor)) | ||
sys.exit(1) | ||
|
||
if len(sys.argv) < 2: | ||
print("URL not specified") | ||
print("Usage -> python driver.py URL") | ||
sys.exit(1) | ||
|
||
print(""" | ||
8888888 88888888888 8888888b. 8888888b. .d88888b. 88888888888 888 888 | ||
888 888 888 Y88b 888 Y88b d88P" "Y88b 888 888 888 | ||
888 888 888 888 888 888 888 888 888 888 888 | ||
888 888 888 d88P 888 d88P 888 888 888 Y88b d88P | ||
888 888 8888888P" 8888888P" 888 888 888 Y88b d88P | ||
888 888 888 888 T88b 888 888 888 Y88o88P | ||
888 888 888 888 T88b Y88b. .d88P d8b 888 Y888P | ||
8888888 888 888 888 T88b "Y88888P" Y8P 888 Y8P | ||
""") | ||
|
||
url = sys.argv[1] | ||
|
||
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 ' \ | ||
'Safari/537.36 ' | ||
headers = { | ||
'User-Agent': user_agent | ||
} | ||
|
||
chrome_options = Options() | ||
chrome_options.add_argument(f'user-agent={user_agent}') | ||
chrome_options.add_argument('--disable-extensions') | ||
chrome_options.add_argument('--no-sandbox') | ||
chrome_options.add_argument('--disable-gpu') | ||
chrome_options.add_argument('--disable-popup-blocking') | ||
chrome_options.add_argument('--disable-logging') | ||
chrome_options.add_argument('--allow-running-insecure-content') | ||
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"]) | ||
chrome_options.add_argument('--headless') | ||
|
||
if platform.system() == 'Linux': | ||
if os.path.exists("/usr/bin/chromedriver"): | ||
browser = webdriver.Chrome(executable_path="/usr/bin/chromedriver", | ||
options=chrome_options) | ||
else: | ||
print("Chromedriver not found; expected path '/usr/bin/chromedriver'") | ||
exit(1) | ||
else: | ||
if os.path.exists("C:/ChromeDriver/chromedriver.exe"): | ||
browser = webdriver.Chrome(executable_path="C:/ChromeDriver/chromedriver.exe", | ||
options=chrome_options) | ||
else: | ||
print("Chromedriver not found; expected path 'C:/ChromeDriver/chromedriver.exe'") | ||
exit(1) | ||
|
||
browser.set_page_load_timeout(10000) | ||
browser.maximize_window() | ||
browser.get("https://app.itpro.tv/login/") | ||
browser.get(url) | ||
|
||
print("Executing for " + url) | ||
time.sleep(5) | ||
print('* Trying to log in ... *') | ||
|
||
try: | ||
baker.bake() | ||
with open('cookies.json') as cookie_file: | ||
cookies = json.load(cookie_file) | ||
for cookie in cookies: | ||
browser.add_cookie(cookie) | ||
except Exception as e: | ||
browser.close() | ||
browser.quit() | ||
raise e | ||
|
||
browser.get(url) | ||
time.sleep(5) | ||
html = browser.page_source | ||
parsed_html = BeautifulSoup(html, 'html5lib') | ||
|
||
if parsed_html.find(id='topAccountNav') is None: | ||
browser.close() | ||
browser.quit() | ||
raise Exception(' ** Failed to log in. Please renew the "cookies.txt" file. **') | ||
else: | ||
print(" - Logged in!") | ||
|
||
urls = [] | ||
lessons = [] | ||
lesson_urls = [] | ||
lesson_names = [] | ||
|
||
course_name = re.sub('[?/:\n\t]', '', parsed_html.find('h3', attrs={'class', 'mb-0'}).text) | ||
print("Course name detected as " + course_name) | ||
|
||
browser.execute_script("return document.querySelectorAll('.notCurrentTopic').forEach(e => e.click())") | ||
|
||
time.sleep(10) | ||
|
||
parsed_html = BeautifulSoup(browser.page_source, 'html5lib') | ||
|
||
lesson_links = parsed_html.find_all('a', attrs={'class', 'episodeLink'}) | ||
|
||
for lesson_link in lesson_links: | ||
lesson_urls.append('https://app.itpro.tv' + lesson_link['href']) | ||
|
||
print("Enumerating links and sources ...") | ||
|
||
|
||
for index, lesson_url in enumerate(lesson_urls): | ||
browser.get(lesson_url) | ||
temp_html = browser.page_source | ||
temp_parsed_html = BeautifulSoup(temp_html, 'html5lib') | ||
time.sleep(10) | ||
while True: | ||
try: | ||
lessons.append(browser.execute_script("return document.getElementsByTagName('video')[0].src")) | ||
lesson_names.append(browser.execute_script("return document.querySelector('#courseContentLayer > div > div > div.d-flex.flex-column.flex-lg-row > div.flex-grow-1 > h1').innerText")) | ||
except Exception: | ||
continue | ||
break | ||
print(f'Progress: {(index + 1)} of {len(lesson_urls)}', end='\r') | ||
|
||
directory = os.getcwd() + os.path.sep + re.sub('[?/:\n]', '', course_name) | ||
if not os.path.exists(directory): | ||
os.mkdir(directory) | ||
|
||
browser.close() | ||
browser.quit() | ||
print("Commencing download ...") | ||
|
||
for index, lesson in enumerate(lessons, start=0): | ||
try: | ||
print(str.format('Downloading: {} ...', lesson_names[index])) | ||
r = requests.get(lessons[index], headers=headers, stream=True) | ||
video_file = directory + os.path.sep + str(index + 1) + ". " + re.sub('[?/:\n]', '', | ||
lesson_names[index]) + ".mp4" | ||
total = r.headers.get('content-length') | ||
if not os.path.exists(video_file) or os.stat(video_file).st_size != int(total): | ||
with open(video_file, 'wb') as f: | ||
if total is None: | ||
f.write(r.content) | ||
else: | ||
total_size = int(total, 0) | ||
block_size = 1024 | ||
t = tqdm(total=total_size, unit='iB', unit_scale=True) | ||
for data in r.iter_content(block_size): | ||
t.update(len(data)) | ||
f.write(data) | ||
t.close() | ||
else: | ||
print(" - {} exists. Skipping download.".format(lesson_names[index])) | ||
except Exception as e: | ||
print(e) | ||
|
||
print('\n\033[92m** Downloads completed! **\033[0m\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
beautifulsoup4 | ||
requests | ||
tqdm | ||
selenium | ||
html5lib |