-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
262 lines (228 loc) · 9.38 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import streamlit as st
from streamlit_folium import st_folium
import pandas as pd
import time
import os
import json
import plotly.express as px # Add this import
from utils import (
load_data,
batch_geocode,
identify_undervalued_properties,
create_property_map,
analyze_market, # Add this import
create_market_analysis_charts # Add this import
)
from helpers import format_price
# Configuration
st.set_page_config(
page_title="Funda Property Analysis",
page_icon=":house:",
layout="wide",
initial_sidebar_state="expanded"
)
def main():
st.title("Amsterdam Property Analysis - December 2024")
# File paths using relative path resolution
current_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(current_dir, "funda_listings_with_coords.csv")
# Load and process data
try:
df = load_data(file_path)
except FileNotFoundError:
st.error(f"Error: Could not find the file at {file_path}")
return
# Add coordinates if not already present
if 'latitude' not in df.columns:
try:
# Try to load from cache first
with open(coords_cache_path, 'r') as f:
coords_cache = json.load(f)
df['latitude'] = [coord[0] for coord in coords_cache]
df['longitude'] = [coord[1] for coord in coords_cache]
except (FileNotFoundError, json.JSONDecodeError):
with st.spinner("Adding geolocation data..."):
# Create address tuples for geocoding
addresses = list(zip(df['address'], df['zip']))
# Batch geocode addresses
coordinates = batch_geocode(addresses)
# Save coordinates to cache
with open(coords_cache_path, 'w') as f:
json.dump(coordinates, f)
# Add coordinates to dataframe
df['latitude'] = [coord[0] for coord in coordinates]
df['longitude'] = [coord[1] for coord in coordinates]
# Identify undervalued properties
df = identify_undervalued_properties(df)
# Calculate price range values
min_price = int(df['price_numeric'].min())
max_price = int(df['price_numeric'].max())
# Create tabs for different sections
tab1, tab2 = st.tabs(["Map View", "Market Analysis"])
with tab1:
# Map filters section
st.subheader("Map filter")
col1, _ = st.columns(2) # Use _ for unused column
with col1:
# Changed checkbox logic - now shows all properties when checked
show_all = st.checkbox(
"Show all properties (takes time to load)",
value=False, # Default to showing only undervalued
key="map_all_filter"
)
# Price range options
price_steps = [
50_000, 75_000, 100_000, 125_000, 150_000, 175_000, 200_000,
225_000, 250_000, 275_000, 300_000, 325_000, 350_000, 375_000, 400_000,
450_000, 500_000, 550_000, 600_000, 650_000, 700_000, 750_000,
800_000, 900_000, 1_000_000, 1_250_000, 1_500_000, 2_000_000,
2_500_000, 3_000_000, 3_500_000, 4_000_000, 4_500_000, 5_000_000
]
price_options = [format_price(p) for p in price_steps]
# Create columns for min/max selection
price_col1, price_col2 = st.columns(2)
with price_col1:
min_price_selected = st.selectbox(
"min price",
options=price_options[:-1],
index=0,
key="min_price_select" # Add unique key
)
# Convert selected min price to numeric
min_price_value = float(min_price_selected.replace('€', '').replace(',', ''))
with price_col2:
min_idx = price_options.index(min_price_selected)
max_price_selected = st.selectbox(
"max price",
options=price_options[min_idx + 1:],
index=len(price_options[min_idx + 1:]) - 1,
key="max_price_select" # Add unique key
)
# Convert selected max price to numeric
max_price_value = float(max_price_selected.replace('€', '').replace(',', ''))
# Updated mask logic
mask = (
(df['price_numeric'] >= min_price_value) &
(df['price_numeric'] <= max_price_value)
)
if not show_all: # Show only undervalued by default
mask &= df['is_undervalued']
df_filtered = df[mask]
# Map section with statistics
map_col1, map_col2 = st.columns([3, 1])
with map_col1:
st.subheader("Map of properties for sale in Amsterdam")
st.text("By default, showing undervalued properties.")
st.text("Remember to activate openstreetmap in the settings and click on a marker to see more details!")
property_map = create_property_map(df_filtered)
st_folium(
property_map,
width="100%",
height=600,
returned_objects=["last_active_drawing"],
use_container_width=True
)
with map_col2:
st.subheader("High level stats")
st.metric(
"Total properties",
f"{len(df_filtered):,}" # Add comma separator
)
st.metric(
"Average price",
format_price(df_filtered['price_numeric'].mean())
)
st.metric(
"Undervalued properties",
df_filtered['is_undervalued'].sum()
)
if 'price_per_sqm' in df_filtered.columns:
st.metric(
"Average price/m²",
format_price(df_filtered['price_per_sqm'].mean())
)
st.markdown("*Note: undervalued properties calculated using a proprietary algorithm, contact us for additional detail.*")
with tab2:
# Add summary statistics section
st.subheader("Summary Statistics")
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
"Total Listings",
f"{len(df):,}" # Add comma separator
)
st.metric(
"Average Price",
format_price(df['price_numeric'].mean())
)
st.metric(
"Median Price",
format_price(df['price_numeric'].median())
)
st.metric(
"Total Market Value",
format_price(df['price_numeric'].sum())
)
with col2:
st.metric(
"Average Living Area",
f"{df['living_area_numeric'].mean():.1f} m²"
)
st.metric(
"Median Living Area",
f"{df['living_area_numeric'].median():.1f} m²"
)
st.metric(
"Average Price/m²",
format_price(df['price_per_sqm'].mean())
)
st.metric(
"Total Living Area",
f"{df['living_area_numeric'].sum():,.0f} m²"
)
with col3:
st.metric(
"Houses",
f"{len(df[df['house_type'] == 'House']):,}" # Add comma separator
)
st.metric(
"Apartments",
f"{len(df[df['house_type'] == 'Apartment']):,}" # Add comma separator
)
st.metric(
"Postal Codes",
f"{df['zip'].nunique():,}" # Add comma separator
)
st.text("")
st.subheader("Market Insights")
# Display unfiltered charts
charts = create_market_analysis_charts(df)
st.plotly_chart(charts['area_price'], use_container_width=True)
st.plotly_chart(charts['price_by_zip'], use_container_width=True)
st.plotly_chart(charts['price_dist'], use_container_width=True)
st.plotly_chart(charts['price_dist_1m'], use_container_width=True)
st.plotly_chart(charts['price_per_sqm'], use_container_width=True)
st.plotly_chart(charts['property_type'], use_container_width=True)
st.plotly_chart(charts['price_tree'], use_container_width=True)
st.markdown(
"*The tree map above shows the distribution of properties by price range. "
"The size of each box represents the number of properties in that range.*"
)
st.plotly_chart(charts['area_tree'], use_container_width=True)
st.markdown(
"*The tree map above shows the distribution of properties by area range. "
"The size of each box represents the number of properties in that range.*"
)
if __name__ == "__main__":
main()
# Footer
st.markdown(
'Made by [Valentin Mendez](https://www.linkedin.com/in/valentemendez/) using information from [Funda NL](https://www.funda.nl/)'
)
# Hide the "Made with Streamlit" footer
hide_streamlit_style = """
<style>
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)