-
-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1f717bd
commit 8e04f6b
Showing
11 changed files
with
274 additions
and
241 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
""" | ||
This script scrapes sold and pending sold land listings in past year for a list of zip codes and saves the data to individual Excel files. | ||
It adds two columns to the data: 'lot_acres' and 'ppa' (price per acre) for user to analyze average price of land in a zip code. | ||
""" | ||
|
||
import os | ||
import pandas as pd | ||
from homeharvest import scrape_property | ||
|
||
|
||
def get_property_details(zip: str, listing_type): | ||
properties = scrape_property(location=zip, listing_type=listing_type, property_type=["land"], past_days=365) | ||
if not properties.empty: | ||
properties["lot_acres"] = properties["lot_sqft"].apply(lambda x: x / 43560 if pd.notnull(x) else None) | ||
|
||
properties = properties[properties["sqft"].isnull()] | ||
properties["ppa"] = properties.apply( | ||
lambda row: ( | ||
int( | ||
( | ||
row["sold_price"] | ||
if (pd.notnull(row["sold_price"]) and row["status"] == "SOLD") | ||
else row["list_price"] | ||
) | ||
/ row["lot_acres"] | ||
) | ||
if pd.notnull(row["lot_acres"]) | ||
and row["lot_acres"] > 0 | ||
and (pd.notnull(row["sold_price"]) or pd.notnull(row["list_price"])) | ||
else None | ||
), | ||
axis=1, | ||
) | ||
properties["ppa"] = properties["ppa"].astype("Int64") | ||
selected_columns = [ | ||
"property_url", | ||
"property_id", | ||
"style", | ||
"status", | ||
"street", | ||
"city", | ||
"state", | ||
"zip_code", | ||
"county", | ||
"list_date", | ||
"last_sold_date", | ||
"list_price", | ||
"sold_price", | ||
"lot_sqft", | ||
"lot_acres", | ||
"ppa", | ||
] | ||
properties = properties[selected_columns] | ||
return properties | ||
|
||
|
||
def output_to_excel(zip_code, sold_df, pending_df): | ||
root_folder = os.getcwd() | ||
zip_folder = os.path.join(root_folder, "zips", zip_code) | ||
|
||
# Create zip code folder if it doesn't exist | ||
os.makedirs(zip_folder, exist_ok=True) | ||
|
||
# Define file paths | ||
sold_file = os.path.join(zip_folder, f"{zip_code}_sold.xlsx") | ||
pending_file = os.path.join(zip_folder, f"{zip_code}_pending.xlsx") | ||
|
||
# Save individual sold and pending files | ||
sold_df.to_excel(sold_file, index=False) | ||
pending_df.to_excel(pending_file, index=False) | ||
|
||
|
||
zip_codes = map( | ||
str, | ||
[ | ||
22920, | ||
77024, | ||
78028, | ||
24553, | ||
22967, | ||
22971, | ||
22922, | ||
22958, | ||
22969, | ||
22949, | ||
22938, | ||
24599, | ||
24562, | ||
22976, | ||
24464, | ||
22964, | ||
24581, | ||
], | ||
) | ||
|
||
combined_df = pd.DataFrame() | ||
for zip in zip_codes: | ||
sold_df = get_property_details(zip, "sold") | ||
pending_df = get_property_details(zip, "pending") | ||
combined_df = pd.concat([combined_df, sold_df, pending_df], ignore_index=True) | ||
output_to_excel(zip, sold_df, pending_df) | ||
|
||
combined_file = os.path.join(os.getcwd(), "zips", "combined.xlsx") | ||
combined_df.to_excel(combined_file, index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.