Skip to content

Commit

Permalink
- zillow proxy support
Browse files Browse the repository at this point in the history
  • Loading branch information
ZacharyHampton committed Sep 29, 2023
1 parent 59317fd commit 3b7c17b
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
10 changes: 8 additions & 2 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
import requests
import tls_client
from .models import Property, ListingType, SiteName


Expand All @@ -12,15 +13,20 @@ class ScraperInput:


class Scraper:
def __init__(self, scraper_input: ScraperInput):
def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_client.Session = None):
self.location = scraper_input.location
self.listing_type = scraper_input.listing_type

self.session = requests.Session()
if not session:
self.session = requests.Session()
else:
self.session = session

if scraper_input.proxy:
proxy_url = scraper_input.proxy
proxies = {"http": proxy_url, "https": proxy_url}
self.session.proxies.update(proxies)

self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name

Expand Down
5 changes: 3 additions & 2 deletions homeharvest/core/scrapers/zillow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@

class ZillowScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.session = tls_client.Session(
session = tls_client.Session(
client_identifier="chrome112", random_tls_extension_order=True
)

super().__init__(scraper_input, session)

self.session.headers.update({
'authority': 'www.zillow.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.2.17"
version = "0.2.18"
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
Expand Down

0 comments on commit 3b7c17b

Please sign in to comment.