Skip to content

Commit 730b882

Browse files
committed
Poc to have typed object indexes
Same as #123 but without generational_indexes. I think the indexes are really great for performance since: * all structures are then `Vector` (great random access and cache friendly iterations) * the ids are short (integer vs string) I don't think we need those performance in this crate though. Having only typed index (a think wrapper over the real gtfs identifier) would be more convenient I think. It would: * ease debug (easy to print the gtfs identifier, instead of having a meaningless integer) * ease serialisation (same, we can serialize the string right away) * be a more more close to the gtfs representation This is still *very* early WIP, I'm not convinced at all by the ergonomics, I'd like to keep the property of the Index in #123 that the `Id` have at least existed at one point (even if I don't plan to ensure this if an object is deleted).
1 parent 90db386 commit 730b882

File tree

5 files changed

+90
-61
lines changed

5 files changed

+90
-61
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ read-url = ["reqwest", "futures"]
1414
[dependencies]
1515
bytes = "1"
1616
csv = "1.1"
17-
derivative = "2.1"
17+
derivative = "2.2.0"
1818
serde = "1.0"
1919
serde_derive = "1.0"
2020
chrono = "0.4"

src/gtfs.rs

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
use crate::{objects::*, Error, RawGtfs};
1+
use crate::{objects::*, Error, Id, RawGtfs};
22
use chrono::prelude::NaiveDate;
33
use chrono::Duration;
44
use std::collections::{HashMap, HashSet};
55
use std::convert::TryFrom;
6-
use std::sync::Arc;
76

87
/// Data structure with all the GTFS objects
98
///
@@ -27,8 +26,8 @@ pub struct Gtfs {
2726
pub calendar: HashMap<String, Calendar>,
2827
/// All calendar dates grouped by service_id
2928
pub calendar_dates: HashMap<String, Vec<CalendarDate>>,
30-
/// All stop by `stop_id`. Stops are in an [Arc] because they are also referenced by each [StopTime]
31-
pub stops: HashMap<String, Arc<Stop>>,
29+
/// All stop by `stop_id`
30+
pub stops: HashMap<Id<Stop>, Stop>,
3231
/// All routes by `route_id`
3332
pub routes: HashMap<String, Route>,
3433
/// All trips by `trip_id`
@@ -49,12 +48,12 @@ impl TryFrom<RawGtfs> for Gtfs {
4948
///
5049
/// It might fail if some mandatory files couldn’t be read or if there are references to other objects that are invalid.
5150
fn try_from(raw: RawGtfs) -> Result<Gtfs, Error> {
51+
let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?;
5252
let stops = to_stop_map(
5353
raw.stops?,
5454
raw.transfers.unwrap_or_else(|| Ok(Vec::new()))?,
5555
raw.pathways.unwrap_or(Ok(Vec::new()))?,
5656
)?;
57-
let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?;
5857
let trips = create_trips(raw.trips?, raw.stop_times?, frequencies, &stops)?;
5958

6059
Ok(Gtfs {
@@ -175,10 +174,10 @@ impl Gtfs {
175174
}
176175

177176
/// Gets a [Stop] by its `stop_id`
178-
pub fn get_stop<'a>(&'a self, id: &str) -> Result<&'a Stop, Error> {
177+
pub fn get_stop<'a>(&'a self, id: &Id<Stop>) -> Result<&'a Stop, Error> {
179178
match self.stops.get(id) {
180179
Some(stop) => Ok(stop),
181-
None => Err(Error::ReferenceError(id.to_owned())),
180+
None => Err(Error::ReferenceError(id.to_string())),
182181
}
183182
}
184183

@@ -225,7 +224,7 @@ impl Gtfs {
225224
}
226225
}
227226

228-
fn to_map<O: Id>(elements: impl IntoIterator<Item = O>) -> HashMap<String, O> {
227+
fn to_map<O: WithId>(elements: impl IntoIterator<Item = O>) -> HashMap<String, O> {
229228
elements
230229
.into_iter()
231230
.map(|e| (e.id().to_owned(), e))
@@ -236,33 +235,35 @@ fn to_stop_map(
236235
stops: Vec<Stop>,
237236
raw_transfers: Vec<RawTransfer>,
238237
raw_pathways: Vec<RawPathway>,
239-
) -> Result<HashMap<String, Arc<Stop>>, Error> {
240-
let mut stop_map: HashMap<String, Stop> =
241-
stops.into_iter().map(|s| (s.id.clone(), s)).collect();
242-
238+
) -> Result<HashMap<Id<Stop>, Stop>, Error> {
239+
let mut stop_map = HashMap::<Id<Stop>, Stop>::default();
240+
for s in stops.into_iter() {
241+
stop_map.insert(Id::must_exists(s.id.clone()), s);
242+
}
243243
for transfer in raw_transfers {
244+
// Note: I'm not convinced at all by this Id::must_exists...
245+
// we shouldn't have to allocate here, and we must find a way to really ensure that the id exists (or remove the verbosity)
244246
stop_map
245-
.get(&transfer.to_stop_id)
247+
.get(&Id::must_exists(transfer.to_stop_id.clone()))
246248
.ok_or_else(|| Error::ReferenceError(transfer.to_stop_id.to_string()))?;
247-
stop_map
248-
.entry(transfer.from_stop_id.clone())
249-
.and_modify(|stop| stop.transfers.push(StopTransfer::from(transfer)));
249+
let to_stop_id = Id::must_exists(transfer.to_stop_id.clone());
250+
let s = stop_map
251+
.get_mut(&Id::must_exists(transfer.from_stop_id.clone()))
252+
.ok_or_else(|| Error::ReferenceError(transfer.from_stop_id.to_string()))?;
253+
s.transfers.push(StopTransfer::from((transfer, to_stop_id)));
250254
}
251255

252256
for pathway in raw_pathways {
253257
stop_map
254-
.get(&pathway.to_stop_id)
258+
.get(&Id::must_exists(pathway.to_stop_id.clone()))
255259
.ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?;
256-
stop_map
257-
.entry(pathway.from_stop_id.clone())
258-
.and_modify(|stop| stop.pathways.push(Pathway::from(pathway)));
260+
let s = stop_map
261+
.get_mut(&Id::must_exists(pathway.from_stop_id.clone()))
262+
.ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?;
263+
let stop_id = Id::must_exists(pathway.to_stop_id.clone());
264+
s.pathways.push(Pathway::from((pathway, stop_id)));
259265
}
260-
261-
let res = stop_map
262-
.into_iter()
263-
.map(|(i, s)| (i, Arc::new(s)))
264-
.collect();
265-
Ok(res)
266+
Ok(stop_map)
266267
}
267268

268269
fn to_shape_map(shapes: Vec<Shape>) -> HashMap<String, Vec<Shape>> {
@@ -292,7 +293,7 @@ fn create_trips(
292293
raw_trips: Vec<RawTrip>,
293294
raw_stop_times: Vec<RawStopTime>,
294295
raw_frequencies: Vec<RawFrequency>,
295-
stops: &HashMap<String, Arc<Stop>>,
296+
stops: &HashMap<Id<Stop>, Stop>,
296297
) -> Result<HashMap<String, Trip>, Error> {
297298
let mut trips = to_map(raw_trips.into_iter().map(|rt| Trip {
298299
id: rt.id,
@@ -312,10 +313,11 @@ fn create_trips(
312313
let trip = &mut trips
313314
.get_mut(&s.trip_id)
314315
.ok_or_else(|| Error::ReferenceError(s.trip_id.to_string()))?;
316+
let stop_id = Id::must_exists(s.stop_id.clone());
315317
let stop = stops
316-
.get(&s.stop_id)
317-
.ok_or_else(|| Error::ReferenceError(s.stop_id.to_string()))?;
318-
trip.stop_times.push(StopTime::from(&s, Arc::clone(stop)));
318+
.get(&stop_id)
319+
.ok_or_else(|| Error::ReferenceError(stop_id.to_string()))?;
320+
trip.stop_times.push(StopTime::from(&s, stop_id));
319321
}
320322

321323
for trip in &mut trips.values_mut() {

src/id.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
use core::marker::PhantomData;
2+
#[derive(Derivative)]
3+
#[derive(Serialize, Deserialize)]
4+
#[derivative(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
5+
pub struct Id<T> {
6+
id: String,
7+
#[serde(skip)]
8+
#[derivative(Debug(bound = ""))]
9+
#[derivative(Debug = "ignore")]
10+
#[derivative(Clone(bound = ""))]
11+
#[derivative(Eq(bound = ""))]
12+
#[derivative(PartialEq(bound = ""))]
13+
#[derivative(Hash(bound = ""))]
14+
_phantom: PhantomData<T>,
15+
}
16+
17+
impl<T> Id<T> {
18+
pub fn must_exists(s: String) -> Id<T> {
19+
Id {
20+
id: s,
21+
_phantom: PhantomData,
22+
}
23+
}
24+
}
25+
26+
impl<T> std::ops::Deref for Id<T> {
27+
type Target = str;
28+
fn deref(&self) -> &str {
29+
&self.id
30+
}
31+
}

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ mod enums;
4747
pub mod error;
4848
mod gtfs;
4949
mod gtfs_reader;
50+
mod id;
5051
pub(crate) mod objects;
5152
mod raw_gtfs;
5253
mod serde_helpers;
@@ -57,5 +58,6 @@ mod tests;
5758
pub use error::Error;
5859
pub use gtfs::Gtfs;
5960
pub use gtfs_reader::GtfsReader;
61+
pub use id::Id;
6062
pub use objects::*;
6163
pub use raw_gtfs::RawGtfs;

src/objects.rs

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
pub use crate::enums::*;
2-
use crate::serde_helpers::*;
2+
use crate::{serde_helpers::*, Id};
33
use chrono::{Datelike, NaiveDate, Weekday};
44
use rgb::RGB8;
55

@@ -9,17 +9,11 @@ use std::sync::Arc;
99
/// Objects that have an identifier implement this trait
1010
///
1111
/// Those identifier are technical and should not be shown to travellers
12-
pub trait Id {
12+
pub trait WithId {
1313
/// Identifier of the object
1414
fn id(&self) -> &str;
1515
}
1616

17-
impl<T: Id> Id for Arc<T> {
18-
fn id(&self) -> &str {
19-
self.as_ref().id()
20-
}
21-
}
22-
2317
/// Trait to introspect what is the object’s type (stop, route…)
2418
pub trait Type {
2519
/// What is the type of the object
@@ -100,7 +94,7 @@ impl Type for Calendar {
10094
}
10195
}
10296

103-
impl Id for Calendar {
97+
impl WithId for Calendar {
10498
fn id(&self) -> &str {
10599
&self.id
106100
}
@@ -199,7 +193,7 @@ impl Type for Stop {
199193
}
200194
}
201195

202-
impl Id for Stop {
196+
impl WithId for Stop {
203197
fn id(&self) -> &str {
204198
&self.id
205199
}
@@ -258,14 +252,14 @@ pub struct RawStopTime {
258252
}
259253

260254
/// The moment where a vehicle, running on [Trip] stops at a [Stop]. See <https://gtfs.org/reference/static/#stopstxt>
261-
#[derive(Clone, Debug, Default)]
255+
#[derive(Clone, Debug)]
262256
pub struct StopTime {
263257
/// Arrival time of the stop time.
264258
/// It's an option since the intermediate stops can have have no arrival
265259
/// and this arrival needs to be interpolated
266260
pub arrival_time: Option<u32>,
267261
/// [Stop] where the vehicle stops
268-
pub stop: Arc<Stop>,
262+
pub stop: Id<Stop>,
269263
/// Departure time of the stop time.
270264
/// It's an option since the intermediate stops can have have no departure
271265
/// and this departure needs to be interpolated
@@ -290,7 +284,7 @@ pub struct StopTime {
290284

291285
impl StopTime {
292286
/// Creates [StopTime] by linking a [RawStopTime::stop_id] to the actual [Stop]
293-
pub fn from(stop_time_gtfs: &RawStopTime, stop: Arc<Stop>) -> Self {
287+
pub fn from(stop_time_gtfs: &RawStopTime, stop: Id<Stop>) -> Self {
294288
Self {
295289
arrival_time: stop_time_gtfs.arrival_time,
296290
departure_time: stop_time_gtfs.departure_time,
@@ -362,7 +356,7 @@ impl Type for Route {
362356
}
363357
}
364358

365-
impl Id for Route {
359+
impl WithId for Route {
366360
fn id(&self) -> &str {
367361
&self.id
368362
}
@@ -412,7 +406,7 @@ impl Type for RawTrip {
412406
}
413407
}
414408

415-
impl Id for RawTrip {
409+
impl WithId for RawTrip {
416410
fn id(&self) -> &str {
417411
&self.id
418412
}
@@ -463,7 +457,7 @@ impl Type for Trip {
463457
}
464458
}
465459

466-
impl Id for Trip {
460+
impl WithId for Trip {
467461
fn id(&self) -> &str {
468462
&self.id
469463
}
@@ -514,7 +508,7 @@ impl Type for Agency {
514508
}
515509
}
516510

517-
impl Id for Agency {
511+
impl WithId for Agency {
518512
fn id(&self) -> &str {
519513
match &self.id {
520514
None => "",
@@ -555,7 +549,7 @@ impl Type for Shape {
555549
}
556550
}
557551

558-
impl Id for Shape {
552+
impl WithId for Shape {
559553
fn id(&self) -> &str {
560554
&self.id
561555
}
@@ -582,7 +576,7 @@ pub struct FareAttribute {
582576
pub transfer_duration: Option<usize>,
583577
}
584578

585-
impl Id for FareAttribute {
579+
impl WithId for FareAttribute {
586580
fn id(&self) -> &str {
587581
&self.id
588582
}
@@ -655,22 +649,22 @@ pub struct RawTransfer {
655649
pub min_transfer_time: Option<u32>,
656650
}
657651

658-
#[derive(Debug, Default, Clone)]
652+
#[derive(Debug, Clone)]
659653
/// Transfer information between stops
660654
pub struct StopTransfer {
661655
/// Stop which to transfer to
662-
pub to_stop_id: String,
656+
pub to_stop_id: Id<Stop>,
663657
/// Type of the transfer
664658
pub transfer_type: TransferType,
665659
/// Minimum time needed to make the transfer in seconds
666660
pub min_transfer_time: Option<u32>,
667661
}
668662

669-
impl From<RawTransfer> for StopTransfer {
663+
impl From<(RawTransfer, Id<Stop>)> for StopTransfer {
670664
/// Converts from a [RawTransfer] to a [StopTransfer]
671-
fn from(transfer: RawTransfer) -> Self {
665+
fn from((transfer, to_stop_id): (RawTransfer, Id<Stop>)) -> Self {
672666
Self {
673-
to_stop_id: transfer.to_stop_id,
667+
to_stop_id: to_stop_id,
674668
transfer_type: transfer.transfer_type,
675669
min_transfer_time: transfer.min_transfer_time,
676670
}
@@ -755,12 +749,12 @@ pub struct RawPathway {
755749
}
756750

757751
/// Pathway going from a stop to another.
758-
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
752+
#[derive(Debug, Clone, Serialize, Deserialize)]
759753
pub struct Pathway {
760754
/// Uniquely identifies the pathway
761755
pub id: String,
762756
/// Location at which the pathway ends
763-
pub to_stop_id: String,
757+
pub to_stop_id: Id<Stop>,
764758
/// Type of pathway between the specified (from_stop_id, to_stop_id) pair
765759
pub mode: PathwayMode,
766760
/// Indicates in which direction the pathway can be used
@@ -781,18 +775,18 @@ pub struct Pathway {
781775
pub reversed_signposted_as: Option<String>,
782776
}
783777

784-
impl Id for Pathway {
778+
impl WithId for Pathway {
785779
fn id(&self) -> &str {
786780
&self.id
787781
}
788782
}
789783

790-
impl From<RawPathway> for Pathway {
784+
impl From<(RawPathway, Id<Stop>)> for Pathway {
791785
/// Converts from a [RawPathway] to a [Pathway]
792-
fn from(raw: RawPathway) -> Self {
786+
fn from((raw, to_stop_id): (RawPathway, Id<Stop>)) -> Self {
793787
Self {
794788
id: raw.id,
795-
to_stop_id: raw.to_stop_id,
789+
to_stop_id: to_stop_id,
796790
mode: raw.mode,
797791
is_bidirectional: raw.is_bidirectional,
798792
length: raw.length,

0 commit comments

Comments
 (0)