Skip to content

Commit

Permalink
fix: actually use the updated user agent
Browse files Browse the repository at this point in the history
docs: some clarity about the state of the project
  • Loading branch information
Mattwmaster58 committed Nov 16, 2024
1 parent 30e4ce6 commit c62fe03
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 25 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

Transplanted from [puppeteer-extra-plugin-stealth](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth), with some improvements. Don't expect this to bypass anything but the simplest of bot detection methods. Consider this a proof-of-concept starting point.

This is a WIP fork, with the goal of replacing the out-of-date upstream. See the [changelog](./README.md).
This is a WIP fork. I've merged some of the outstanding PRs and added some features with the goal of replacing the out-of-date upstream. See the [changelog](./README.md).

## Install

Install the latest stable release:
```
$ pip install git+https://github.com/Mattwmaster58/playwright_stealth@rc2
$ pip install git+https://github.com/Mattwmaster58/playwright_stealth@rc3
```
or straight from `main`
```
Expand All @@ -27,6 +27,7 @@ from playwright_stealth import Stealth, ALL_EVASIONS_DISABLED_KWARGS
async def main():
# This is the recommended usage. All pages created will have stealth applied:
async with Stealth().use_async(async_playwright()) as p:
# or, to hook every browser launched from this context: stealth.hook_playwright_context(p)
browser = await p.chromium.launch()
page = await browser.new_page()
print("from new_page: ", await page.evaluate("navigator.webdriver"))
Expand All @@ -41,8 +42,8 @@ async def main():
init_scripts_only=True
)
async with async_playwright() as p:
# or, to hook every browser launched from this context: stealth.hook_playwright_context(p)
browser = await p.chromium.launch()
# this isn't, certain evasions will work worse, and some won't be able to be applied at all
context = await browser.new_context()
await stealth.apply_stealth_async(context)
page_1 = await context.new_page()
Expand Down
44 changes: 23 additions & 21 deletions playwright_stealth/stealth.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,12 @@ def _generate_hooked_new_page(self, new_page_method: Callable) -> Callable:
*args and **kwargs even though these methods may not take any number of arguments,
we want to preserve accurate stack traces when caller passes args improperly
"""
browser_instance = new_page_method.__self__
USER_AGENT_OVERRIDE_PIGGYBACK_KEY = "_stealth_user_agent"
SEC_CH_UA_OVERRIDE_PIGGYBACK_KEY = "_stealth_sec_ch_ua"

browser_instance = new_page_method.__self__
is_chromium = browser_instance.browser_type.name == "chromium"

async def get_user_agent_and_sec_ch_ua_async(page_method: Callable) -> Tuple[str, str]:
"""
If there's no override, it's Chrome, and we haven't cached a UA value prior, we need to come up
Expand All @@ -316,8 +318,8 @@ async def get_user_agent_and_sec_ch_ua_async(page_method: Callable) -> Tuple[str
user_agent, sec_ch_ua
"""
temp_page: Optional[async_api.Page]
stealth_user_agent = getattr(browser_instance, USER_AGENT_OVERRIDE_PIGGYBACK_KEY)
sec_ch_ua = getattr(browser_instance, SEC_CH_UA_OVERRIDE_PIGGYBACK_KEY)
stealth_user_agent = getattr(browser_instance, USER_AGENT_OVERRIDE_PIGGYBACK_KEY, None)
sec_ch_ua = getattr(browser_instance, SEC_CH_UA_OVERRIDE_PIGGYBACK_KEY, None)
if stealth_user_agent is None or sec_ch_ua is None:
temp_page = await page_method()
stealth_user_agent = (await temp_page.evaluate("navigator.userAgent")).replace(
Expand Down Expand Up @@ -345,43 +347,43 @@ def get_user_agent_and_sec_ch_ua_sync(page_method: Callable) -> Tuple[str, str]:
async def hooked_browser_method_async(*args, **kwargs):
# respect any override the user passes themselves
if self.navigator_user_agent and kwargs.get("user_agent") is None:
user_agent_override = self.navigator_user_agent_override
if user_agent_override is None and browser_instance.browser_type == "chromium":
user_agent_override, _ = await get_user_agent_and_sec_ch_ua_async(new_page_method)
kwargs["user_agent"] = self.navigator_user_agent_override
resolved_user_agent_override = self.navigator_user_agent_override
if resolved_user_agent_override is None and is_chromium:
resolved_user_agent_override, _ = await get_user_agent_and_sec_ch_ua_async(new_page_method)
kwargs["user_agent"] = resolved_user_agent_override

extra_http_headers = kwargs.get("extra_http_headers", {})
# respect any override the user passes themselves
if self.sec_ch_ua and CaseInsensitiveDict(extra_http_headers).get("sec-ch-ua") is None:
sec_ch_ua_override = self.sec_ch_ua_override
if sec_ch_ua_override is None and browser_instance.browser_type == "chromium":
_, sec_ch_ua_override = await get_user_agent_and_sec_ch_ua_async(new_page_method)
if sec_ch_ua_override is not None:
resolved_sec_ch_ua_override = self.sec_ch_ua_override
if resolved_sec_ch_ua_override is None and is_chromium:
_, resolved_sec_ch_ua_override = await get_user_agent_and_sec_ch_ua_async(new_page_method)
if resolved_sec_ch_ua_override is not None:
# this could be tricky is a differently cased key of the same thing exists,
# but we have done a case-insensitive check above that precludes this
extra_http_headers["sec-ch-ua"] = sec_ch_ua_override
extra_http_headers["sec-ch-ua"] = resolved_sec_ch_ua_override
kwargs["extra_http_headers"] = extra_http_headers
page = await new_page_method(*args, **kwargs)
await self.apply_stealth_async(page)
return page

def hooked_browser_method_sync(*args, **kwargs):
if self.navigator_user_agent and kwargs.get("user_agent") is None:
user_agent_override = self.navigator_user_agent_override
if user_agent_override is None and browser_instance.browser_type == "chromium":
user_agent_override, _ = get_user_agent_and_sec_ch_ua_sync(new_page_method)
resolved_user_agent_override = self.navigator_user_agent_override
if resolved_user_agent_override is None and is_chromium:
resolved_user_agent_override, _ = get_user_agent_and_sec_ch_ua_sync(new_page_method)
kwargs["user_agent"] = self.navigator_user_agent_override

extra_http_headers = kwargs.get("extra_http_headers", {})
if self.sec_ch_ua and CaseInsensitiveDict(extra_http_headers).get("sec-ch-ua") is None:
sec_ch_ua_override = self.sec_ch_ua_override
resolved_sec_ch_ua_override = self.sec_ch_ua_override
# respect any override the user has already made
if sec_ch_ua_override is None and browser_instance.browser_type == "chromium":
_, sec_ch_ua_override = get_user_agent_and_sec_ch_ua_sync(new_page_method)
if sec_ch_ua_override is not None:
if resolved_sec_ch_ua_override is None and is_chromium:
_, resolved_sec_ch_ua_override = get_user_agent_and_sec_ch_ua_sync(new_page_method)
if resolved_sec_ch_ua_override is not None:
# this could be tricky is a differently cased key of the same thing exists,
# but we have done a case-insensitive check above that precludes this
extra_http_headers["sec-ch-ua"] = sec_ch_ua_override
extra_http_headers["sec-ch-ua"] = resolved_sec_ch_ua_override
kwargs["extra_http_headers"] = extra_http_headers
page = new_page_method(*args, **kwargs)
self.apply_stealth_sync(page)
Expand Down Expand Up @@ -428,7 +430,7 @@ def _get_greased_chrome_sec_ua_ch(user_agent: str):
greased_versions = [8, 99, 24]
greasy_chars = " ():-./;=?_"
greasy_brand = f"Not{random.choice(greasy_chars)}A{random.choice(greasy_chars)}Brand"
version = re.search(r"Chrome/([\d.]+)", user_agent, re.IGNORECASE)
version = re.search(r"Chrome/(\d+)[\d.]+", user_agent, re.IGNORECASE)
major_version = version.group(1)
brands = [
("Chromium", major_version),
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = playwright-stealth
version = 2.0.0rc1
version = 2.0.0rc3
author = AtuboDad
author_email = lcjasas@sina.com
maintainer = Mattwmaster58
Expand Down

0 comments on commit c62fe03

Please sign in to comment.