Skip to content

Commit a011ba9

Browse files
committed
add more ways to configure web browser from command-line
* option to use system browser * option to add custom browser command-line arguments * option to ignore https headers
1 parent 955bbc9 commit a011ba9

File tree

5 files changed

+73
-2
lines changed

5 files changed

+73
-2
lines changed

docs/authentication.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ Options:
4848
-b, --browser [chromium|firefox|webkit|chrome|chrome-beta]
4949
Which browser to use
5050
--user-agent TEXT User-Agent header to use
51+
--system-browser Use web browser installed by the system
52+
--browser-args TEXT Browser command-line arguments
53+
--ignore-https-errors Ignore HTTPS errors
5154
--devtools Open browser DevTools
5255
--help Show this message and exit.
5356
```

docs/javascript.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ Options:
153153
Which browser to use
154154
--user-agent TEXT User-Agent header to use
155155
--reduced-motion Emulate 'prefers-reduced-motion' media feature
156+
--system-browser Use web browser installed by the system
157+
--browser-args TEXT Browser command-line arguments
158+
--ignore-https-errors Ignore HTTPS errors
156159
--help Show this message and exit.
157160
```
158161
<!-- [[[end]]] -->

docs/multi.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ Options:
139139
Which browser to use
140140
--user-agent TEXT User-Agent header to use
141141
--reduced-motion Emulate 'prefers-reduced-motion' media feature
142+
--system-browser Use web browser installed by the system
143+
--browser-args TEXT Browser command-line arguments
144+
--ignore-https-errors Ignore HTTPS errors
142145
--help Show this message and exit.
143146
```
144147
<!-- [[[end]]] -->

docs/screenshots.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,9 @@ Options:
268268
Which browser to use
269269
--user-agent TEXT User-Agent header to use
270270
--reduced-motion Emulate 'prefers-reduced-motion' media feature
271+
--system-browser Use web browser installed by the system
272+
--browser-args TEXT Browser command-line arguments
273+
--ignore-https-errors Ignore HTTPS errors
271274
--help Show this message and exit.
272275
```
273276
<!-- [[[end]]] -->

shot_scraper/cli.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import textwrap
1111
import time
1212
import yaml
13+
from distutils import spawn
1314

1415
from shot_scraper.utils import filename_for_url, url_or_file_path
1516

@@ -40,6 +41,25 @@ def reduced_motion_option(fn):
4041
)(fn)
4142
return fn
4243

44+
def system_browser_option(fn):
45+
click.option(
46+
"--system-browser",
47+
is_flag=True,
48+
help="Use web browser installed by the system"
49+
)(fn)
50+
return fn
51+
52+
def browser_args_option(fn):
53+
click.option("--browser-args", help="Browser command-line arguments")(fn)
54+
return fn
55+
56+
def ignore_https_errors_option(fn):
57+
click.option(
58+
"--ignore-https-errors",
59+
is_flag=True,
60+
help="Ignore HTTPS errors"
61+
)(fn)
62+
return fn
4363

4464
@click.group(
4565
cls=DefaultGroup,
@@ -142,6 +162,9 @@ def cli():
142162
@browser_option
143163
@user_agent_option
144164
@reduced_motion_option
165+
@system_browser_option
166+
@browser_args_option
167+
@ignore_https_errors_option
145168
def shot(
146169
url,
147170
auth,
@@ -165,6 +188,9 @@ def shot(
165188
browser,
166189
user_agent,
167190
reduced_motion,
191+
system_browser,
192+
browser_args,
193+
ignore_https_errors,
168194
):
169195
"""
170196
Take a single screenshot of a page or portion of a page.
@@ -224,6 +250,9 @@ def shot(
224250
user_agent=user_agent,
225251
timeout=timeout,
226252
reduced_motion=reduced_motion,
253+
system_browser=system_browser,
254+
browser_args=browser_args,
255+
ignore_https_errors=ignore_https_errors,
227256
)
228257
if interactive or devtools:
229258
use_existing_page = True
@@ -267,8 +296,15 @@ def _browser_context(
267296
user_agent=None,
268297
timeout=None,
269298
reduced_motion=False,
299+
system_browser=False,
300+
browser_args=None,
301+
ignore_https_errors=None,
270302
):
271303
browser_kwargs = dict(headless=not interactive, devtools=devtools)
304+
if system_browser:
305+
browser_kwargs['executable_path'] = spawn.find_executable(browser)
306+
if browser_args:
307+
browser_kwargs["args"] = browser_args.split(' ')
272308
if browser == "chromium":
273309
browser_obj = p.chromium.launch(**browser_kwargs)
274310
elif browser == "firefox":
@@ -287,6 +323,8 @@ def _browser_context(
287323
context_args["reduced_motion"] = "reduce"
288324
if user_agent is not None:
289325
context_args["user_agent"] = user_agent
326+
if ignore_https_errors is not None:
327+
context_args["ignore_https_errors"] = ignore_https_errors
290328
context = browser_obj.new_context(**context_args)
291329
if timeout:
292330
context.set_default_timeout(timeout)
@@ -318,6 +356,9 @@ def _browser_context(
318356
@browser_option
319357
@user_agent_option
320358
@reduced_motion_option
359+
@system_browser_option
360+
@browser_args_option
361+
@ignore_https_errors_option
321362
def multi(
322363
config,
323364
auth,
@@ -328,6 +369,9 @@ def multi(
328369
browser,
329370
user_agent,
330371
reduced_motion,
372+
system_browser,
373+
browser_args,
374+
ignore_https_errors,
331375
):
332376
"""
333377
Take multiple screenshots, defined by a YAML file
@@ -358,6 +402,9 @@ def multi(
358402
user_agent=user_agent,
359403
timeout=timeout,
360404
reduced_motion=reduced_motion,
405+
system_browser=system_browser,
406+
browser_args=browser_args,
407+
ignore_https_errors=ignore_https_errors,
361408
)
362409
for shot in shots:
363410
if (
@@ -444,8 +491,11 @@ def accessibility(url, auth, output, javascript, timeout):
444491
@browser_option
445492
@user_agent_option
446493
@reduced_motion_option
494+
@system_browser_option
495+
@browser_args_option
496+
@ignore_https_errors_option
447497
def javascript(
448-
url, javascript, input, auth, output, browser, user_agent, reduced_motion
498+
url, javascript, input, auth, output, browser, user_agent, reduced_motion, system_browser, browser_args, ignore_https_errors,
449499
):
450500
"""
451501
Execute JavaScript against the page and return the result as JSON
@@ -482,6 +532,9 @@ def javascript(
482532
browser=browser,
483533
user_agent=user_agent,
484534
reduced_motion=reduced_motion,
535+
system_browser=system_browser,
536+
browser_args=browser_args,
537+
ignore_https_errors=ignore_https_errors,
485538
)
486539
page = context.new_page()
487540
page.goto(url)
@@ -640,8 +693,11 @@ def install(browser):
640693
)
641694
@browser_option
642695
@user_agent_option
696+
@system_browser_option
697+
@browser_args_option
698+
@ignore_https_errors_option
643699
@click.option("--devtools", is_flag=True, help="Open browser DevTools")
644-
def auth(url, context_file, browser, user_agent, devtools):
700+
def auth(url, context_file, browser, user_agent, devtools, system_browser, browser_args, ignore_https_errors):
645701
"""
646702
Open a browser so user can manually authenticate with the specified site,
647703
then save the resulting authentication context to a file.
@@ -658,6 +714,9 @@ def auth(url, context_file, browser, user_agent, devtools):
658714
devtools=devtools,
659715
browser=browser,
660716
user_agent=user_agent,
717+
system_browser=system_browser,
718+
browser_args=browser_args,
719+
ignore_https_errors=ignore_https_errors,
661720
)
662721
context = browser_obj.new_context()
663722
page = context.new_page()

0 commit comments

Comments
 (0)