Skip to content

Commit

Permalink
enh: add agent
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Apr 16, 2024
1 parent 1f47fc3 commit 2a5ac3e
Show file tree
Hide file tree
Showing 13 changed files with 419 additions and 158 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish-to-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ jobs:
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
password: ${{ secrets.PYPI_API_TOKEN }}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
**/.pytest_cache/
*.pyc
/.ipynb_checkpoints/
*.csv
*.csv
21 changes: 21 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-added-large-files
- id: check-yaml
- repo: https://github.com/adrienverge/yamllint
rev: v1.29.0
hooks:
- id: yamllint
verbose: true # create awareness of linter findings
args: ["-d", "{extends: relaxed, rules: {line-length: {max: 120}}}"]
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
language_version: python
args: [--line-length=120, --quiet]
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
```bash
pip install -U homeharvest
```
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_

## Usage

Expand All @@ -39,11 +39,11 @@ properties = scrape_property(
location="San Diego, CA",
listing_type="sold", # or (for_sale, for_rent, pending)
past_days=30, # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)
# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",

# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",
# foreclosure=True

# mls_only=True, # only fetch MLS listings
)
print(f"Number of properties: {len(properties)}")
Expand Down Expand Up @@ -84,7 +84,7 @@ Optional
├── date_from, date_to (string): Start and end dates to filter properties listed or sold, both dates are required.
| (use this to get properties in chunks as there's a 10k result limit)
│ Format for both must be "YYYY-MM-DD".
│ Format for both must be "YYYY-MM-DD".
│ Example: "2023-05-01", "2023-05-15" (fetches properties listed/sold between these dates)
├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings)
Expand Down Expand Up @@ -143,4 +143,3 @@ The following exceptions may be raised when using HomeHarvest:

- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD

8 changes: 2 additions & 6 deletions homeharvest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

def main():
parser = argparse.ArgumentParser(description="Home Harvest Property Scraper")
parser.add_argument(
"location", type=str, help="Location to scrape (e.g., San Francisco, CA)"
)
parser.add_argument("location", type=str, help="Location to scrape (e.g., San Francisco, CA)")

parser.add_argument(
"-l",
Expand Down Expand Up @@ -35,9 +33,7 @@ def main():
help="Name of the output file (without extension)",
)

parser.add_argument(
"-p", "--proxy", type=str, default=None, help="Proxy to use for scraping"
)
parser.add_argument("-p", "--proxy", type=str, default=None, help="Proxy to use for scraping")
parser.add_argument(
"-d",
"--days",
Expand Down
33 changes: 27 additions & 6 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
import requests
import uuid
from .models import Property, ListingType, SiteName


Expand Down Expand Up @@ -27,6 +28,12 @@ def __init__(

if not session:
self.session = requests.Session()
self.session.headers.update(
{
"auth": f"Bearer {self.get_access_token()}",
"apollographql-client-name": "com.move.Realtor-apollo-ios",
}
)
else:
self.session = session

Expand All @@ -43,12 +50,26 @@ def __init__(
self.date_to = scraper_input.date_to
self.foreclosure = scraper_input.foreclosure

def search(self) -> list[Property]:
...
def search(self) -> list[Property]: ...

@staticmethod
def _parse_home(home) -> Property:
...
def _parse_home(home) -> Property: ...

def handle_location(self):
...
def handle_location(self): ...

def get_access_token(self):
url = "https://graph.realtor.com/auth/token"

payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
headers = {
"Host": "graph.realtor.com",
"x-client-version": "24.20.4.149916",
"accept": "*/*",
"content-type": "Application/json",
"user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0",
"accept-language": "en-US,en;q=0.9",
}
response = requests.post(url, headers=headers, data=payload)

data = response.json()
return data["access_token"]
8 changes: 8 additions & 0 deletions homeharvest/core/scrapers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ class Description:
stories: int | None = None


@dataclass
class Agent:
name: str | None = None
phone: str | None = None


@dataclass
class Property:
property_url: str
Expand All @@ -89,3 +95,5 @@ class Property:
latitude: float | None = None
longitude: float | None = None
neighborhoods: Optional[str] = None

agents: list[Agent] = None
Loading

0 comments on commit 2a5ac3e

Please sign in to comment.