Skip to content

Commit b1d9c17

Browse files
authored
Merge pull request #1065 from cal-itp/corridor_study_sb125
Corridor study SB125
2 parents cd075a2 + e43b532 commit b1d9c17

File tree

3 files changed

+10379
-0
lines changed

3 files changed

+10379
-0
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Replica and Streetlight Analysis Utils
3+
"""
4+
5+
import pandas as pd
6+
from siuba import *
7+
import ast
8+
9+
from calitp_data_analysis.sql import to_snakecase
10+
11+
import altair as alt
12+
from calitp_data_analysis import calitp_color_palette as cp
13+
14+
15+
"""
16+
Replica Analysis Utils
17+
"""
18+
##function that returns Replica transit data into df we can analyze easier
19+
def get_tranist_agency_counts(df, primary_mode_col, transit_mode_col, transit_agency_col, activity_id_col):
20+
## return a df with the agency counts
21+
agencies = (df
22+
>>filter(_[primary_mode_col] =="public_transit")
23+
>>group_by(_[primary_mode_col], _[transit_mode_col], _[transit_agency_col])
24+
>>summarize(n =_[activity_id_col].nunique())
25+
>>arrange(-_.n))
26+
27+
agencies[transit_mode_col] = agencies[transit_mode_col].astype(str)
28+
agencies[transit_agency_col] = agencies[transit_agency_col].astype(str)
29+
30+
agencies['agency_count'] = [len(set(x.split(", "))) for x in
31+
agencies[transit_agency_col].str.lower()]
32+
agencies['n_modes_taken'] = agencies[transit_mode_col].apply(lambda x: len(x.split()))
33+
34+
## return a df with the mode counts
35+
modes = (df
36+
>>filter(_[primary_mode_col] =="public_transit")
37+
>>count(_[transit_mode_col])>>arrange(-_.n))
38+
39+
modes[transit_mode_col] = modes[transit_mode_col].astype(str)
40+
# modes[transit_agency_col] = modes[transit_agency_col].astype(str)
41+
42+
modes['n_modes_taken'] = modes[transit_mode_col].apply(lambda x: len(x.split()))
43+
44+
return agencies, modes
45+
46+
def get_list_of_agencies(df, transit_agency_col):
47+
48+
## Get just one columns
49+
column = df[[transit_agency_col]]
50+
#remove single-dimensional entries from the shape of an array
51+
col_text = column.squeeze()
52+
# get list of words
53+
text_list = col_text.tolist()
54+
# #join list of words
55+
text_list = ', '.join(text_list).title()
56+
57+
text_list = text_list.replace(", ", "', '")
58+
text_list = "['" + text_list + "']"
59+
60+
agency_list = ast.literal_eval(text_list)
61+
agency_list = set(agency_list)
62+
63+
return agency_list
64+
65+
def get_dummies_by_agency(df, col):
66+
transit_agencies = set()
67+
for agencies in df[col].str.split(', '):
68+
transit_agencies.update(agencies)
69+
unique_agencies = []
70+
71+
for agency in transit_agencies:
72+
df[agency] = df[col].str.count(agency)
73+
unique_agencies.append(agency)
74+
75+
### adding column for unique agencies list
76+
def get_unique_agencies(agency_list):
77+
unique_agencies = set()
78+
for agencies in agency_list:
79+
unique_agencies.update(agencies.split(', '))
80+
return ', '.join(sorted(list(unique_agencies)))
81+
82+
# Applying the function to each row of the dataframe to get unique agencies
83+
df['unique_agencies'] = df[col].str.split(', ').apply(lambda x: get_unique_agencies(x))
84+
85+
return df
86+
87+
def get_agencies_occurances(df):
88+
89+
##get list of columns that have agency names
90+
columns_to_keep = list(df.columns.values)
91+
92+
## remove columns that are not agency names
93+
unwanted_cols = {'transit_agency', 'unique_agencies', 'primary_mode', 'transit_submode', 'n', 'agency_count', 'n_modes_taken' }
94+
columns_to_keep = [e for e in columns_to_keep if e not in unwanted_cols]
95+
96+
df_agencies = df.loc[:, columns_to_keep]
97+
98+
## transpose the df again
99+
df_agencies = pd.DataFrame(df_agencies).transpose().reset_index().rename(columns={'index':'agency'})
100+
101+
##sum up the number of trips by agency
102+
df_agencies['n_trips'] = df_agencies[list(df_agencies.columns)].sum(axis=1)
103+
104+
## filter df to only have two cols
105+
df_agencies = df_agencies>>select(_.agency, _.n_trips)
106+
107+
return df_agencies
108+
109+
"""
110+
Streetlight Analysis Utils
111+
"""
112+
113+

0 commit comments

Comments
 (0)