diff --git a/doc/filtering_observed_arrivals.ipynb b/doc/filtering_observed_arrivals.ipynb
index c265acd..f14f93e 100644
--- a/doc/filtering_observed_arrivals.ipynb
+++ b/doc/filtering_observed_arrivals.ipynb
@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -29,7 +29,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -393,13 +393,15 @@
"|2 | 62 |\n",
"| 4 | 6 |\n",
"\n",
- "So the "
+ "So the `train_id` isn't quite unique per trip, but something weird is happening to have more train ids than actual trains..."
]
},
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {},
+ "metadata": {
+ "collapsed": true
+ },
"outputs": [
{
"ename": "NameError",
@@ -423,20 +425,6 @@
"one_train = pandasql.read_sql(sql, con)"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": 5,
@@ -6803,6 +6791,13 @@
" & ((train_136['train_message'] != 'Delayed') | (train_136['timint'] < 1.0 ))]"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Using Minute-Resolution Data"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -7616,7 +7611,1486 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "The top \"trips\" are from Bloor-Spadina to Yonge via St. George and vice-versa, but these are the stations on line 2..."
+ "The top \"trips\" are from Bloor-Spadina to Yonge via St. George and vice-versa, but these are the stations on line 2... I think we are getting data for the wrong line because the line id is actually assigned by the data pulling script. So we need to filter the data by line based on `subwayline` field in the `ntas_data` table."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Re-examining Line 1 by Filtering on Stations\n",
+ "Re-doing this entire processing by using the `subwayline` from the `ntas_data` table instead"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Recreate a day of data\n",
+ "sql = '''DROP MATERIALIZED VIEW IF EXISTS test_day CASCADE; \n",
+ "CREATE MATERIALIZED VIEW test_day AS \n",
+ "SELECT requestid, stationid, line_id, create_date, request_date, station_char, subwayline, system_message_type, \n",
+ " timint, traindirection, trainid, train_message\n",
+ "FROM requests_serverless\n",
+ "INNER JOIN ntas_data_serverless USING (requestid)\n",
+ "INNER JOIN stations ON stationid = station_id\n",
+ "WHERE request_date >= '2019-07-17'::DATE + interval '5 hours' \n",
+ "AND request_date < '2019-07-17'::DATE + interval '29 hours' \n",
+ "''' \n",
+ "with con:\n",
+ " with con.cursor() as cur:\n",
+ " cur.execute(sql)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Re-checking the number of unique trains"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " subwayline | \n",
+ " Number of trains in a day | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " BD | \n",
+ " 48 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " SHEP | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " YUS | \n",
+ " 169 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " subwayline Number of trains in a day\n",
+ "0 BD 48\n",
+ "1 SHEP 4\n",
+ "2 YUS 169"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql = ''' SELECT subwayline, COUNT(DISTINCT trainid) AS \"Number of trains in a day\"\n",
+ " FROM test_day\n",
+ " GROUP BY subwayline'''\n",
+ "pandasql.read_sql(sql, con)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Double-checking stations and lines are correctly mapped"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " subwayline | \n",
+ " station_char | \n",
+ " stationid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " BD | \n",
+ " BAT1 | \n",
+ " 46 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " BD | \n",
+ " BAT2 | \n",
+ " 46 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " BD | \n",
+ " BAU1 | \n",
+ " 49 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " BD | \n",
+ " BAU2 | \n",
+ " 49 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " YUS | \n",
+ " BLO1 | \n",
+ " 50 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " YUS | \n",
+ " BLO1 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " YUS | \n",
+ " BLO2 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " YUS | \n",
+ " BLO2 | \n",
+ " 50 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " BD | \n",
+ " BRD1 | \n",
+ " 53 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " BD | \n",
+ " BRD2 | \n",
+ " 53 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " BD | \n",
+ " BSP1 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " BD | \n",
+ " BSP1 | \n",
+ " 47 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " BD | \n",
+ " BSP2 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " BD | \n",
+ " BSP2 | \n",
+ " 47 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " SHEP | \n",
+ " BSS1 | \n",
+ " 66 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " SHEP | \n",
+ " BSS2 | \n",
+ " 66 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " SHEP | \n",
+ " BYV1 | \n",
+ " 65 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " SHEP | \n",
+ " BYV2 | \n",
+ " 65 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " BD | \n",
+ " CFK1 | \n",
+ " 52 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " BD | \n",
+ " CFK2 | \n",
+ " 52 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " BD | \n",
+ " CHE1 | \n",
+ " 54 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " BD | \n",
+ " CHE2 | \n",
+ " 54 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " BD | \n",
+ " CHR1 | \n",
+ " 45 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " BD | \n",
+ " CHR2 | \n",
+ " 45 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " YUS | \n",
+ " COL1 | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " YUS | \n",
+ " COL2 | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " BD | \n",
+ " COX1 | \n",
+ " 58 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " BD | \n",
+ " COX2 | \n",
+ " 58 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " YUS | \n",
+ " CVL1 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " YUS | \n",
+ " CVL2 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 134 | \n",
+ " YUS | \n",
+ " SUM1 | \n",
+ " 24 | \n",
+ "
\n",
+ " \n",
+ " 135 | \n",
+ " YUS | \n",
+ " SUM2 | \n",
+ " 24 | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " YUS | \n",
+ " UNI1 | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " 137 | \n",
+ " YUS | \n",
+ " UNI2 | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " 138 | \n",
+ " YUS | \n",
+ " VMC1 | \n",
+ " 80 | \n",
+ "
\n",
+ " \n",
+ " 139 | \n",
+ " YUS | \n",
+ " VMC2 | \n",
+ " 80 | \n",
+ "
\n",
+ " \n",
+ " 140 | \n",
+ " BD | \n",
+ " VPK1 | \n",
+ " 61 | \n",
+ "
\n",
+ " \n",
+ " 141 | \n",
+ " BD | \n",
+ " VPK2 | \n",
+ " 61 | \n",
+ "
\n",
+ " \n",
+ " 142 | \n",
+ " BD | \n",
+ " WAR1 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " 143 | \n",
+ " BD | \n",
+ " WAR2 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " 144 | \n",
+ " BD | \n",
+ " WDB1 | \n",
+ " 59 | \n",
+ "
\n",
+ " \n",
+ " 145 | \n",
+ " BD | \n",
+ " WDB2 | \n",
+ " 59 | \n",
+ "
\n",
+ " \n",
+ " 146 | \n",
+ " YUS | \n",
+ " WEL1 | \n",
+ " 21 | \n",
+ "
\n",
+ " \n",
+ " 147 | \n",
+ " YUS | \n",
+ " WEL2 | \n",
+ " 21 | \n",
+ "
\n",
+ " \n",
+ " 148 | \n",
+ " YUS | \n",
+ " WIL1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 149 | \n",
+ " YUS | \n",
+ " WIL2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 150 | \n",
+ " SHEP | \n",
+ " YIE1 | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 151 | \n",
+ " SHEP | \n",
+ " YIE1 | \n",
+ " 64 | \n",
+ "
\n",
+ " \n",
+ " 152 | \n",
+ " SHEP | \n",
+ " YIE2 | \n",
+ " 64 | \n",
+ "
\n",
+ " \n",
+ " 153 | \n",
+ " SHEP | \n",
+ " YIE2 | \n",
+ " 30 | \n",
+ "
\n",
+ " \n",
+ " 154 | \n",
+ " YUS | \n",
+ " YKD1 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 155 | \n",
+ " YUS | \n",
+ " YKD2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 156 | \n",
+ " YUS | \n",
+ " YKM1 | \n",
+ " 29 | \n",
+ "
\n",
+ " \n",
+ " 157 | \n",
+ " YUS | \n",
+ " YKM2 | \n",
+ " 29 | \n",
+ "
\n",
+ " \n",
+ " 158 | \n",
+ " BD | \n",
+ " YNG1 | \n",
+ " 50 | \n",
+ "
\n",
+ " \n",
+ " 159 | \n",
+ " BD | \n",
+ " YNG1 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 160 | \n",
+ " BD | \n",
+ " YNG2 | \n",
+ " 50 | \n",
+ "
\n",
+ " \n",
+ " 161 | \n",
+ " BD | \n",
+ " YNG2 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 162 | \n",
+ " YUS | \n",
+ " YUN1 | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ " 163 | \n",
+ " YUS | \n",
+ " YUN2 | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
164 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " subwayline station_char stationid\n",
+ "0 BD BAT1 46\n",
+ "1 BD BAT2 46\n",
+ "2 BD BAU1 49\n",
+ "3 BD BAU2 49\n",
+ "4 YUS BLO1 50\n",
+ "5 YUS BLO1 22\n",
+ "6 YUS BLO2 22\n",
+ "7 YUS BLO2 50\n",
+ "8 BD BRD1 53\n",
+ "9 BD BRD2 53\n",
+ "10 BD BSP1 9\n",
+ "11 BD BSP1 47\n",
+ "12 BD BSP2 9\n",
+ "13 BD BSP2 47\n",
+ "14 SHEP BSS1 66\n",
+ "15 SHEP BSS2 66\n",
+ "16 SHEP BYV1 65\n",
+ "17 SHEP BYV2 65\n",
+ "18 BD CFK1 52\n",
+ "19 BD CFK2 52\n",
+ "20 BD CHE1 54\n",
+ "21 BD CHE2 54\n",
+ "22 BD CHR1 45\n",
+ "23 BD CHR2 45\n",
+ "24 YUS COL1 20\n",
+ "25 YUS COL2 20\n",
+ "26 BD COX1 58\n",
+ "27 BD COX2 58\n",
+ "28 YUS CVL1 6\n",
+ "29 YUS CVL2 6\n",
+ ".. ... ... ...\n",
+ "134 YUS SUM1 24\n",
+ "135 YUS SUM2 24\n",
+ "136 YUS UNI1 16\n",
+ "137 YUS UNI2 16\n",
+ "138 YUS VMC1 80\n",
+ "139 YUS VMC2 80\n",
+ "140 BD VPK1 61\n",
+ "141 BD VPK2 61\n",
+ "142 BD WAR1 62\n",
+ "143 BD WAR2 62\n",
+ "144 BD WDB1 59\n",
+ "145 BD WDB2 59\n",
+ "146 YUS WEL1 21\n",
+ "147 YUS WEL2 21\n",
+ "148 YUS WIL1 2\n",
+ "149 YUS WIL2 2\n",
+ "150 SHEP YIE1 30\n",
+ "151 SHEP YIE1 64\n",
+ "152 SHEP YIE2 64\n",
+ "153 SHEP YIE2 30\n",
+ "154 YUS YKD1 3\n",
+ "155 YUS YKD2 3\n",
+ "156 YUS YKM1 29\n",
+ "157 YUS YKM2 29\n",
+ "158 BD YNG1 50\n",
+ "159 BD YNG1 22\n",
+ "160 BD YNG2 50\n",
+ "161 BD YNG2 22\n",
+ "162 YUS YUN1 77\n",
+ "163 YUS YUN2 77\n",
+ "\n",
+ "[164 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql = '''SELECT DISTINCT subwayline, station_char, stationid\n",
+ "FROM test_day\n",
+ "ORDER BY station_char'''\n",
+ "pandasql.read_sql(sql, con)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Seems ok. Rerunning the other processing queries based on that"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "split_trips = '''\n",
+ " CREATE SEQUENCE IF NOT EXISTS trip_ids;\n",
+ " CREATE MATERIALIZED VIEW test_day_w_trips AS\n",
+ " SELECT trainid, subwayline, traindirection, stationid, station_char, create_date, create_date + timint * interval '1 minute' AS expected_arrival, timint, train_message,\n",
+ " CASE traindirection WHEN lag(traindirection) OVER w THEN currval('trip_ids') ELSE nextval('trip_ids') END AS trip_id\n",
+ " FROM test_day\n",
+ " WHERE (timint < 1 OR train_message = 'AtStation') \n",
+ " WINDOW w AS (PARTITION BY subwayline, trainid ORDER BY create_date + timint * interval '1 minute') \n",
+ " '''\n",
+ "with con:\n",
+ " with con.cursor() as cur:\n",
+ " cur.execute(split_trips)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A final step is to group together multiple observations at a same station, during a same trip, to get an approximation of arrival and \"departure\" time."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "final_step = ''' DROP MATERIALIZED VIEW IF EXISTS test_day_final;\n",
+ "CREATE MATERIALIZED VIEW test_day_final AS \n",
+ "SELECT trainid, subwayline, traindirection, LEFT(station_char, -1) AS station, trip_id,\n",
+ " MIN(expected_arrival) AS estimated_arrival, MAX(expected_arrival) AS estimated_departure,\n",
+ " CASE (ARRAY_AGG(train_message ORDER BY expected_arrival))[1] WHEN 'AtStation' THEN 1 ELSE 0 END AS exact_arr, \n",
+ "CASE (ARRAY_AGG(train_message ORDER BY expected_arrival DESC))[1] WHEN 'AtStation' THEN 1 ELSE 0 END AS exact_dep\n",
+ " FROM test_day_w_trips \n",
+ " GROUP BY trainid, subwayline, traindirection, station, trip_id \n",
+ "'''\n",
+ "with con:\n",
+ " with con.cursor() as cur:\n",
+ " cur.execute(final_step)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " lineid | \n",
+ " Number of observed trips | \n",
+ " Number of scheduled trips | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1116 | \n",
+ " 747 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 741 | \n",
+ " 704 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 558 | \n",
+ " 457 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " lineid Number of observed trips Number of scheduled trips\n",
+ "0 1 1116 747\n",
+ "1 2 741 704\n",
+ "2 4 558 457"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql = ''' WITH observed_trips AS \n",
+ " (SELECT CASE subwayline\n",
+ " WHEN 'YUS' THEN '1'\n",
+ " WHEN 'BD' THEN '2'\n",
+ " WHEN 'SHEP' THEN '4'\n",
+ " END as lineid, \n",
+ " COUNT(DISTINCT trip_id) AS \"Number of observed trips\"\n",
+ " FROM test_day_final\n",
+ " GROUP BY lineid)\n",
+ " , unique_trips AS(SELECT route_short_name AS lineid, COUNT(DISTINCT trip_id) AS \"Number of scheduled trips\"\n",
+ " FROM gtfs.routes -- ON lineid::TEXT = route_short_name\n",
+ " INNER JOIN gtfs.trips USING (route_id)\n",
+ " INNER JOIN gtfs.calendar USING (service_id)\n",
+ " WHERE monday AND route_type = 1 AND route_short_name != '3'\n",
+ " GROUP BY route_short_name)\n",
+ " \n",
+ " SELECT *\n",
+ " FROM observed_trips\n",
+ " INNER JOIN unique_trips USING (lineid)\n",
+ " ORDER BY lineid'''\n",
+ "pandasql.read_sql(sql, con)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sql = '''WITH inferred_trips AS(SELECT CASE subwayline\n",
+ " WHEN 'YUS' THEN 1\n",
+ " WHEN 'BD' THEN 2\n",
+ " WHEN 'SHEP' THEN 4\n",
+ " END as lineid, trip_id, COUNT(1) as stops\n",
+ "FROM test_day_final\n",
+ "GROUP BY lineid, trip_id\n",
+ "),\n",
+ "inferred_trip_length AS( SELECT lineid, stops, COUNT(trip_id) as obs_trips\n",
+ "FROM inferred_trips\n",
+ "GROUP BY lineid, stops)\n",
+ ",\n",
+ "gtfs_trip_lengths AS(SELECT route_short_name::INT AS lineid, trip_id, COUNT(1) as stops\n",
+ " FROM gtfs.stop_times \n",
+ " INNER JOIN gtfs.trips USING (trip_id)\n",
+ " INNER JOIN gtfs.routes USING (route_id)\n",
+ " INNER JOIN gtfs.calendar USING (service_id)\n",
+ " WHERE monday AND route_type = 1 AND route_short_name != '3'\n",
+ " GROUP BY route_short_name, trip_id\n",
+ ")\n",
+ ",gtfs_trip_length_distro AS (SELECT lineid, stops, COUNT(trip_id) as num_trips\n",
+ "FROM gtfs_trip_lengths\n",
+ "GROUP BY lineid, stops)\n",
+ "\n",
+ "SELECT lineid, stops, COALESCE(num_trips,0) as scheduled, COUNT(inferred_trips.trip_id) as observed \n",
+ "FROM inferred_trips\n",
+ "FULL OUTER JOIN gtfs_trip_length_distro USING (lineid, stops)\n",
+ "GROUP BY lineid, stops, num_trips\n",
+ "ORDER BY lineid, stops\n",
+ "'''\n",
+ "trip_lengths = pandasql.read_sql(sql, con)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "line_one = trip_lengths[trip_lengths['lineid'] == 1]\n",
+ "fig, ax = plt.subplots(figsize=(16,9))\n",
+ "line_one.plot(x='stops', y='scheduled', kind='bar', ax=ax,position=0, color='red')\n",
+ "line_one.plot(x='stops', y='observed', sharey=True, sharex=True, kind='bar', ax=ax, position=1, color='blue')\n",
+ "ax.set_title('Line 1 Distribution of Trip Lengths')\n",
+ "ax.yaxis.set_label('Number of trips')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "line_one = trip_lengths[trip_lengths['lineid'] == 2]\n",
+ "fig, ax = plt.subplots(figsize=(16,9))\n",
+ "line_one.plot(x='stops', y='scheduled', kind='bar', ax=ax,position=0, color='red')\n",
+ "line_one.plot(x='stops', y='observed', sharey=True, sharex=True, kind='bar', ax=ax, position=1, color='blue')\n",
+ "ax.set_title('Line 2 Distribution of Trip Lengths')\n",
+ "ax.yaxis.set_label('Number of trips')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "line_one = trip_lengths[trip_lengths['lineid'] == 4]\n",
+ "fig, ax = plt.subplots(figsize=(16,9))\n",
+ "line_one.plot(x='stops', y='scheduled', kind='bar', ax=ax,position=0, color='red')\n",
+ "line_one.plot(x='stops', y='observed', sharey=True, sharex=True, kind='bar', ax=ax, position=1, color='blue')\n",
+ "ax.set_title('Line 4 Distribution of Trip Lengths')\n",
+ "ax.yaxis.set_label('Number of trips')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "I've managed to take out a solid chunk of trips that are too short, at the expense of generating a number of trips with more stops than are on that line...\n",
+ "What does one of those look like?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trainid | \n",
+ " subwayline | \n",
+ " traindirection | \n",
+ " station_char | \n",
+ " trip_id | \n",
+ " estimated_arrival | \n",
+ " estimated_departure | \n",
+ " exact_arr | \n",
+ " exact_dep | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " VMC1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 18:51:04.000000 | \n",
+ " 2019-07-17 18:52:04.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " HWY1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 18:54:05.000000 | \n",
+ " 2019-07-17 18:54:05.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " PVL1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 18:56:07.000000 | \n",
+ " 2019-07-17 18:56:07.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " YUN1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 18:58:01.000000 | \n",
+ " 2019-07-17 18:58:01.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " FIW1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:00:03.000000 | \n",
+ " 2019-07-17 19:00:03.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " DNP1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:03:02.000000 | \n",
+ " 2019-07-17 19:03:02.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " SHW1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:04:59.585143 | \n",
+ " 2019-07-17 19:04:59.585143 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " WIL1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:07:58.497143 | \n",
+ " 2019-07-17 19:07:58.497143 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " YKD1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:09:47.000000 | \n",
+ " 2019-07-17 19:09:47.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " LWW1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:12:32.515429 | \n",
+ " 2019-07-17 19:12:47.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " GCN1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:14:41.849143 | \n",
+ " 2019-07-17 19:14:41.849143 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " CVL1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:16:20.573714 | \n",
+ " 2019-07-17 19:16:20.573714 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " SCW1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:19:50.000000 | \n",
+ " 2019-07-17 19:19:50.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " DUP1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:22:28.628571 | \n",
+ " 2019-07-17 19:22:28.628571 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " SPA1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:23:24.290286 | \n",
+ " 2019-07-17 19:23:48.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " SGU1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:25:05.976000 | \n",
+ " 2019-07-17 19:25:17.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " MUS1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:26:49.352000 | \n",
+ " 2019-07-17 19:26:59.144000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " QPK1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:28:52.000000 | \n",
+ " 2019-07-17 19:28:52.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " STP1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:30:18.006857 | \n",
+ " 2019-07-17 19:30:18.006857 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " OSG1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:31:15.858286 | \n",
+ " 2019-07-17 19:31:15.858286 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " STA1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:32:08.936686 | \n",
+ " 2019-07-17 19:32:08.936686 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " UNI1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:33:33.233600 | \n",
+ " 2019-07-17 19:33:52.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " KNG1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:34:40.618994 | \n",
+ " 2019-07-17 19:35:53.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " QUN1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:36:45.517463 | \n",
+ " 2019-07-17 19:36:55.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " DUN1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:37:43.128343 | \n",
+ " 2019-07-17 19:37:55.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " COL1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:38:48.060366 | \n",
+ " 2019-07-17 19:38:55.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " WEL1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:39:51.195451 | \n",
+ " 2019-07-17 19:40:54.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " BLO1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:41:11.459680 | \n",
+ " 2019-07-17 19:42:16.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " ROS1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:43:52.550491 | \n",
+ " 2019-07-17 19:43:52.550491 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " SUM1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:44:44.792640 | \n",
+ " 2019-07-17 19:44:57.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " STC1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:46:45.399269 | \n",
+ " 2019-07-17 19:46:45.399269 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " DAV1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:47:59.000000 | \n",
+ " 2019-07-17 19:47:59.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " EGL1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:50:20.980434 | \n",
+ " 2019-07-17 19:51:59.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " LAW1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:55:18.706011 | \n",
+ " 2019-07-17 19:55:59.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " YKM1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 19:59:07.000000 | \n",
+ " 2019-07-17 19:59:07.000000 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " SHP1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 20:02:37.078126 | \n",
+ " 2019-07-17 20:03:22.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " NYC1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 20:05:18.456229 | \n",
+ " 2019-07-17 20:06:00.000000 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " FIN1 | \n",
+ " 17803 | \n",
+ " 2019-07-17 20:09:45.090011 | \n",
+ " 2019-07-17 20:10:35.639726 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 155 | \n",
+ " YUS | \n",
+ " North | \n",
+ " FIN2 | \n",
+ " 17803 | \n",
+ " 2019-07-17 20:09:45.090011 | \n",
+ " 2019-07-17 20:10:35.639726 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trainid subwayline traindirection station_char trip_id \\\n",
+ "0 155 YUS North VMC1 17803 \n",
+ "1 155 YUS North HWY1 17803 \n",
+ "2 155 YUS North PVL1 17803 \n",
+ "3 155 YUS North YUN1 17803 \n",
+ "4 155 YUS North FIW1 17803 \n",
+ "5 155 YUS North DNP1 17803 \n",
+ "6 155 YUS North SHW1 17803 \n",
+ "7 155 YUS North WIL1 17803 \n",
+ "8 155 YUS North YKD1 17803 \n",
+ "9 155 YUS North LWW1 17803 \n",
+ "10 155 YUS North GCN1 17803 \n",
+ "11 155 YUS North CVL1 17803 \n",
+ "12 155 YUS North SCW1 17803 \n",
+ "13 155 YUS North DUP1 17803 \n",
+ "14 155 YUS North SPA1 17803 \n",
+ "15 155 YUS North SGU1 17803 \n",
+ "16 155 YUS North MUS1 17803 \n",
+ "17 155 YUS North QPK1 17803 \n",
+ "18 155 YUS North STP1 17803 \n",
+ "19 155 YUS North OSG1 17803 \n",
+ "20 155 YUS North STA1 17803 \n",
+ "21 155 YUS North UNI1 17803 \n",
+ "22 155 YUS North KNG1 17803 \n",
+ "23 155 YUS North QUN1 17803 \n",
+ "24 155 YUS North DUN1 17803 \n",
+ "25 155 YUS North COL1 17803 \n",
+ "26 155 YUS North WEL1 17803 \n",
+ "27 155 YUS North BLO1 17803 \n",
+ "28 155 YUS North ROS1 17803 \n",
+ "29 155 YUS North SUM1 17803 \n",
+ "30 155 YUS North STC1 17803 \n",
+ "31 155 YUS North DAV1 17803 \n",
+ "32 155 YUS North EGL1 17803 \n",
+ "33 155 YUS North LAW1 17803 \n",
+ "34 155 YUS North YKM1 17803 \n",
+ "35 155 YUS North SHP1 17803 \n",
+ "36 155 YUS North NYC1 17803 \n",
+ "37 155 YUS North FIN1 17803 \n",
+ "38 155 YUS North FIN2 17803 \n",
+ "\n",
+ " estimated_arrival estimated_departure exact_arr exact_dep \n",
+ "0 2019-07-17 18:51:04.000000 2019-07-17 18:52:04.000000 1 1 \n",
+ "1 2019-07-17 18:54:05.000000 2019-07-17 18:54:05.000000 1 1 \n",
+ "2 2019-07-17 18:56:07.000000 2019-07-17 18:56:07.000000 1 1 \n",
+ "3 2019-07-17 18:58:01.000000 2019-07-17 18:58:01.000000 1 1 \n",
+ "4 2019-07-17 19:00:03.000000 2019-07-17 19:00:03.000000 1 1 \n",
+ "5 2019-07-17 19:03:02.000000 2019-07-17 19:03:02.000000 1 1 \n",
+ "6 2019-07-17 19:04:59.585143 2019-07-17 19:04:59.585143 0 0 \n",
+ "7 2019-07-17 19:07:58.497143 2019-07-17 19:07:58.497143 0 0 \n",
+ "8 2019-07-17 19:09:47.000000 2019-07-17 19:09:47.000000 1 1 \n",
+ "9 2019-07-17 19:12:32.515429 2019-07-17 19:12:47.000000 0 1 \n",
+ "10 2019-07-17 19:14:41.849143 2019-07-17 19:14:41.849143 0 0 \n",
+ "11 2019-07-17 19:16:20.573714 2019-07-17 19:16:20.573714 0 0 \n",
+ "12 2019-07-17 19:19:50.000000 2019-07-17 19:19:50.000000 1 1 \n",
+ "13 2019-07-17 19:22:28.628571 2019-07-17 19:22:28.628571 0 0 \n",
+ "14 2019-07-17 19:23:24.290286 2019-07-17 19:23:48.000000 0 1 \n",
+ "15 2019-07-17 19:25:05.976000 2019-07-17 19:25:17.000000 0 1 \n",
+ "16 2019-07-17 19:26:49.352000 2019-07-17 19:26:59.144000 0 0 \n",
+ "17 2019-07-17 19:28:52.000000 2019-07-17 19:28:52.000000 1 1 \n",
+ "18 2019-07-17 19:30:18.006857 2019-07-17 19:30:18.006857 0 0 \n",
+ "19 2019-07-17 19:31:15.858286 2019-07-17 19:31:15.858286 0 0 \n",
+ "20 2019-07-17 19:32:08.936686 2019-07-17 19:32:08.936686 0 0 \n",
+ "21 2019-07-17 19:33:33.233600 2019-07-17 19:33:52.000000 0 1 \n",
+ "22 2019-07-17 19:34:40.618994 2019-07-17 19:35:53.000000 0 1 \n",
+ "23 2019-07-17 19:36:45.517463 2019-07-17 19:36:55.000000 0 1 \n",
+ "24 2019-07-17 19:37:43.128343 2019-07-17 19:37:55.000000 0 1 \n",
+ "25 2019-07-17 19:38:48.060366 2019-07-17 19:38:55.000000 0 1 \n",
+ "26 2019-07-17 19:39:51.195451 2019-07-17 19:40:54.000000 0 1 \n",
+ "27 2019-07-17 19:41:11.459680 2019-07-17 19:42:16.000000 0 1 \n",
+ "28 2019-07-17 19:43:52.550491 2019-07-17 19:43:52.550491 0 0 \n",
+ "29 2019-07-17 19:44:44.792640 2019-07-17 19:44:57.000000 0 1 \n",
+ "30 2019-07-17 19:46:45.399269 2019-07-17 19:46:45.399269 0 0 \n",
+ "31 2019-07-17 19:47:59.000000 2019-07-17 19:47:59.000000 1 1 \n",
+ "32 2019-07-17 19:50:20.980434 2019-07-17 19:51:59.000000 0 1 \n",
+ "33 2019-07-17 19:55:18.706011 2019-07-17 19:55:59.000000 0 1 \n",
+ "34 2019-07-17 19:59:07.000000 2019-07-17 19:59:07.000000 1 1 \n",
+ "35 2019-07-17 20:02:37.078126 2019-07-17 20:03:22.000000 0 1 \n",
+ "36 2019-07-17 20:05:18.456229 2019-07-17 20:06:00.000000 0 1 \n",
+ "37 2019-07-17 20:09:45.090011 2019-07-17 20:10:35.639726 0 0 \n",
+ "38 2019-07-17 20:09:45.090011 2019-07-17 20:10:35.639726 0 0 "
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sql = '''WITH inferred_trips AS(SELECT trip_id, COUNT(1) as stops\n",
+ "FROM test_day_final\n",
+ "GROUP BY trip_id\n",
+ "HAVING COUNT(1) > 38\n",
+ "LIMIT 1)\n",
+ "SELECT test_day_final.* \n",
+ "FROM test_day_final\n",
+ "INNER JOIN inferred_trips USING (trip_id)\n",
+ "ORDER BY estimated_arrival'''\n",
+ "pandasql.read_sql(sql, con)"
]
},
{