This repository was archived by the owner on Sep 24, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathextracted_archive_display.py
More file actions
executable file
·62 lines (46 loc) · 1.88 KB
/
extracted_archive_display.py
File metadata and controls
executable file
·62 lines (46 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python3
"""Test that an extracted WebArchive displays correctly.
This complements the webarchive module's unit tests, which confirm
that individual components work correctly, by demonstrating that
they also collectively function as intended.
"""
import os
import sys
import tempfile
import webbrowser
import textwrap
import time
# Absolute path to this source file
# (we need this for the webarchive import)
SOURCE_PATH = os.path.realpath(__file__)
SOURCE_DIR = os.path.dirname(SOURCE_PATH)
SOURCE_PARENT = os.path.dirname(SOURCE_DIR)
# Import our local copy of the webarchive module
sys.path.insert(0, SOURCE_PARENT)
import webarchive
# Directory containing sample data
SAMPLE_DATA_DIR = os.path.join(SOURCE_PARENT, "webarchive", "sample_data")
# Path to our sample archive
# Source: https://en.wikipedia.org/wiki/Main_Page (CC BY-SA)
SAMPLE_ARCHIVE_PATH = os.path.join(SAMPLE_DATA_DIR, "Wikipedia.webarchive")
def run_test():
with tempfile.TemporaryDirectory() as tmp_dir:
output_path = os.path.join(tmp_dir, "Wikipedia.html")
assert (not os.path.exists(output_path))
# Load our sample archive
with webarchive.open(SAMPLE_ARCHIVE_PATH) as archive:
# Extract the archive, and assert that it succeeded
archive.extract(output_path)
assert os.path.isfile(output_path)
# Open the converted page
webbrowser.open(output_path)
# Tell the user what to expect if the page rendered correctly
print(textwrap.dedent("""\
If the archive extracted correctly, your web browser should
display the main page of the English Wikipedia with a featured
article on P. G. Wodehouse."""))
# Wait a few seconds for the browser to open and finish rendering
# before we clean up the temporary directory
time.sleep(10)
if __name__ == "__main__":
run_test()