-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinterpolate.php
255 lines (215 loc) · 8.41 KB
/
interpolate.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
<?php
/*
* Script to interpolate minor stop times using the surrounding
* major stops.
*
* This takes a stop_times.txt file from a google transit feed
* and outputs a new file stop_times_interpolated.txt with the
* missing stop times filled in.
*
* Original use was for the Wellington GTF when they stopped
* including minor stop times.
*
* Interpolates using the shape_dist_traveled assuming the buses
* are moving at a constant speed. Interpolated stops are given
* the same departure and arrival time and times are rounded to
* the nearest minute.
*
* Output file should be the same as the input, but with the
* missing times added. Windows line endings ('\r\n') are
* retained.
*
* The script will exit with an error if it encounters a problem.
*
* If a row contains only one of arrival_time or departure_time,
* it will treat it like a minor stop and recalculate both
* values.
*
* @author Simon Coggins
*
*/
$input = 'stop_times.txt';
$output = 'stop_times_interpolated.txt';
$wh = fopen($output, "w");
if (!$wh) {
echo "Could not open $output for writing.\n";
exit;
}
// read in and process one row at a time
$rh = fopen($input, "r");
if ($rh) {
$last_timing_row = null; // for remembering the last timing point
$records_to_interpolate = array(); // for storing records we need to fix
if (($firstrow = fgets($rh, 4096)) !== false) {
// get the header columns and output to file
$headings = explode(',', trim($firstrow));
write_row($wh, $headings);
// loop through the remaining rows
while (($buffer = fgets($rh, 4096)) !== false) {
// read this row into an array as an associative array
// using the heading row as keys
$line = explode(',', trim($buffer));
$row = array();
foreach ($headings as $heading) {
$row[$heading] = array_shift($line);
}
// main loop
$times_missing = (empty($row['arrival_time']) && empty($row['departure_time']));
// if there are no records that need fixing at the moment, and this
// record has the timing info, there's nothing to do, just output
// this timing row, store it and carry on
if (empty($records_to_interpolate) && !$times_missing) {
$last_timing_row = $row;
write_row($wh, $row);
continue;
}
// we need to fill in the data for this row lets append to the list
// of rows we need to handle at the next timing row
if ($times_missing) {
array_push($records_to_interpolate, $row);
continue;
}
// if we get here we've found the next timing row after one or more
// rows with missing times - we need to interpolate!
// sanity check to ensure we're still on the same trip. If not then
// the trip didn't have timing points at the start/end. bad news!
if ($last_timing_row === null || $last_timing_row['trip_id'] != $row['trip_id']) {
echo "Looks like there's a trip without timing data at the start and/or end!\n";
echo implode(',', $row) . "\n";
fclose($rh);
fclose($wh);
exit;
}
// get time between the two timing stops
$total_time = get_time_diff($last_timing_row['departure_time'], $row['arrival_time']);
if ($total_time === false) {
echo "Problem parsing the arrival or departure time from a timing row\n";
echo "Timing departure: {$last_timing_row['departure_time']}\n";
echo "Timing arrival: {$row['arrival_time']}\n";
fclose($rh);
fclose($wh);
exit;
}
// get distance between the two timing stops
$total_dist = $row['shape_dist_traveled'] - $last_timing_row['shape_dist_traveled'];
// so for each stop to interpolate, the calculation is:
// last timing stop departure time + ( (dist to this stop * total time ) / total dist )
// loop through each missing row, fixing it then writing to file
foreach ($records_to_interpolate as $missing_record) {
$dist_to_this_stop = $missing_record['shape_dist_traveled'] - $last_timing_row['shape_dist_traveled'];
$time_to_this_stop = ( $dist_to_this_stop * $total_time ) / $total_dist;
// calculate arrival time
$arrival_time = add_to_time($last_timing_row['departure_time'], $time_to_this_stop);
if ($arrival_time === false) {
echo "Problem parsing the departure time from a timing row\n";
echo "Timing departure: {$last_timing_row['departure_time']}\n";
fclose($rh);
fclose($wh);
exit;
}
// overwrite value in original row array
$missing_record['arrival_time'] = $arrival_time;
$missing_record['departure_time'] = $arrival_time; // same as arrival time
// write the row
write_row($wh, $missing_record);
}
// reset array for next interpolation
$records_to_interpolate = array();
// write out the timing row and save for next time round the loop
write_row($wh, $row);
$last_timing_row = $row;
}
if (!feof($rh)) {
echo "Error: unexpected fgets() fail\n";
}
} else {
echo "Failed to get first row!\n";
}
fclose($rh);
}
fclose($wh);
/**
* Writes a row of data
*
* @param resource $wh File resource to be written to
* @param array $row Array of values to write to the file handle
*
*/
function write_row($wh, $row) {
fwrite($wh, implode(',', $row) . "\r\n");
}
/**
* Given a time in HH:MM:SS format and a number of seconds, calculate a new
* HH:MM:SS time rounding to the nearest minute
*
* If the additional seconds causes the time to pass midnight, continue
* adding hours instead of wrapping back to 00:00:00. This is to provide
* consistency with how GTF handles late night trips, e.g.:
* Wednesday at 00:01:00 is early Wednesday morning
* Wednesday at 24:01:00 is early Thursday morning
*
* @param string $time Time in HH:MM:SS format
* @param integer $secs Number of seconds to add
*
* @return string|false New time in HH:MM:SS or false if parsing failed
*/
function add_to_time($time, $secs) {
if (preg_match('/^(\d{2}):(\d{2}):(\d{2})$/', $time, $matches) === false) {
// failed to parse time
return false;
}
list($unused, $h, $m, $s) = $matches;
$time_in_seconds = $h*60*60 + $m*60 + $s;
$newtime = $time_in_seconds + $secs;
// convert back to HH:MM:SS but allowing hours to exceed 23
// calculate and subtract the whole hours
$hours = floor($newtime / 3600);
$newtime -= $hours * 3600;
// calculate and substract the whole minutes
$minutes = floor($newtime / 60);
$newtime -= $minutes * 60;
// round to the nearest minute, so increment minutes if
// seconds are 30 - 59 or leave if they are 0 - 29
if ($newtime >= 30) {
$minutes++;
// handle minutes rolling over
if ($minutes == 60) {
$hours++;
$minutes = 0;
}
}
$seconds = 0;
// return HH:MM:SS
return sprintf('%02d:%02d:%02d', $hours, $minutes, $seconds);
}
/**
* Given two times in HH:MM:SS format, return the number of seconds between them
*
* If t2 < t1, assume t2 occurred the next day
*
* @param string $t1 Time in HH:MM:SS
* @param string $t2 Time in HH:MM:SS
*
* @return integer|false Number of seconds or false if time is in wrong format
*/
function get_time_diff($t1, $t2) {
// we need an arbritary date
// to provide support for dates
// rolling past midnight
$date = '2000-01-01';
$str1 = "$date $t1";
$str2 = "$date $t2";
$time1 = strtotime($str1);
$time2 = strtotime($str2);
// failed to parse times
if ($time1 === false || $time2 === false) {
return false;
}
// if time1 appears to be later than time2 we may be working across a date boundary
// e.g. around midnight
// add a day to time2
if ($time1 > $time2 && $time1 - $time2 < 86400) {
$time2 += 86400;
}
return $time2 - $time1;
}