Skip to content

Commit

Permalink
clean(reader-gtfs): removed unused code for writing GTFS feeds
Browse files Browse the repository at this point in the history
  • Loading branch information
kschrab committed Dec 4, 2024
1 parent c5b46c2 commit c5ee210
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 241 deletions.
248 changes: 54 additions & 194 deletions reader-gtfs/src/main/java/com/conveyal/gtfs/model/Entity.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import com.conveyal.gtfs.GTFSFeed;
import com.conveyal.gtfs.error.*;
import com.conveyal.gtfs.util.CsvReader;
import com.conveyal.gtfs.util.CsvWriter;
import org.apache.commons.io.input.BOMInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -44,8 +43,6 @@
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;


/**
* An abstract base class that represents a row in a GTFS table, e.g. a Stop, Trip, or Agency.
Expand All @@ -72,15 +69,17 @@ public static abstract class Loader<E extends Entity> {
protected final Set<String> missingRequiredColumns = new HashSet<>();

protected CsvReader reader;
protected long row;
protected long row;
// TODO "String column" that is set before any calls to avoid passing around the column name

public Loader(GTFSFeed feed, String tableName) {
this.feed = feed;
this.tableName = tableName;
}

/** @return whether the number actual is in the range [min, max] */
/**
* @return whether the number actual is in the range [min, max]
*/
protected boolean checkRangeInclusive(double min, double max, double actual) {
if (actual < min || actual > max) {
feed.errors.add(new RangeError(tableName, row, null, min, max, actual)); // TODO set column name in loader so it's available in methods
Expand All @@ -95,6 +94,7 @@ protected boolean checkRangeInclusive(double min, double max, double actual) {
* I was originally just calling getStringField from the other getXField functions as a first step to get
* the missing-field check. But we don't want deduplication performed on strings that aren't being retained.
* Therefore the missing-field behavior is this separate function.
*
* @return null if column was missing or field is empty
*/
private String getFieldCheckRequired(String column, boolean required) throws IOException {
Expand All @@ -113,7 +113,9 @@ private String getFieldCheckRequired(String column, boolean required) throws IOE
return str;
}

/** @return the given column from the current row as a deduplicated String. */
/**
* @return the given column from the current row as a deduplicated String.
*/
protected String getStringField(String column, boolean required) throws IOException {
return getFieldCheckRequired(column, required);
}
Expand All @@ -123,38 +125,40 @@ protected int getIntField(String column, boolean required, int min, int max) thr
}


protected int getIntField (String column, boolean required, int min, int max, int defaultValue) throws IOException {
Map<Integer, Integer> mapping = null;
return getIntField (column, required, min, max, defaultValue, mapping);
protected int getIntField(String column, boolean required, int min, int max, int defaultValue) throws IOException {
Map<Integer, Integer> mapping = null;
return getIntField(column, required, min, max, defaultValue, mapping);
}

protected int getIntField(String column, boolean required, int min, int max, int defaultValue, final Map<Integer, Integer> mapping) throws IOException {
String str = getFieldCheckRequired(column, required);
int val = INT_MISSING;
if (str == null) {
val = defaultValue; // defaults to 0 per overloaded function, unless provided.
} else try {
val = Integer.parseInt(str);
if (mapping != null) {
Integer mappedVal = mapping.get(val);
if (mappedVal != null)
val = mappedVal;
} else {
try {
val = Integer.parseInt(str);
if (mapping != null) {
Integer mappedVal = mapping.get(val);
if (mappedVal != null) {val = mappedVal;}
}
checkRangeInclusive(min, max, val);
} catch (NumberFormatException nfe) {
feed.errors.add(new NumberParseError(tableName, row, column));
}
checkRangeInclusive(min, max, val);
} catch (NumberFormatException nfe) {
feed.errors.add(new NumberParseError(tableName, row, column));
}
return val;
}

/**
* Fetch the given column of the current row, and interpret it as a time in the format HH:MM:SS.
*
* @return the time value in seconds since midnight
*/
protected int getTimeField(String column, boolean required) throws IOException {
String str = getFieldCheckRequired(column, required);
int val = INT_MISSING;

if (str != null) {
String[] fields = str.split(":");
if (fields.length != 3) {
Expand All @@ -173,49 +177,57 @@ protected int getTimeField(String column, boolean required) throws IOException {
}
}
}

return val;
}

/**
* Fetch the given column of the current row, and interpret it as a date in the format YYYYMMDD.
*
* @return the date value as Java LocalDate, or null if it could not be parsed.
*/
protected LocalDate getDateField(String column, boolean required) throws IOException {
String str = getFieldCheckRequired(column, required);
LocalDate dateTime = null;
if (str != null) try {
dateTime = LocalDate.parse(str, DateTimeFormatter.BASIC_ISO_DATE);
checkRangeInclusive(2000, 2100, dateTime.getYear());
} catch (IllegalArgumentException iae) {
feed.errors.add(new DateParseError(tableName, row, column));
if (str != null) {
try {
dateTime = LocalDate.parse(str, DateTimeFormatter.BASIC_ISO_DATE);
checkRangeInclusive(2000, 2100, dateTime.getYear());
} catch (IllegalArgumentException iae) {
feed.errors.add(new DateParseError(tableName, row, column));
}
}
return dateTime;
}

/**
* Fetch the given column of the current row, and interpret it as a URL.
*
* @return the URL, or null if the field was missing or empty.
*/
protected URL getUrlField(String column, boolean required) throws IOException {
String str = getFieldCheckRequired(column, required);
URL url = null;
if (str != null) try {
url = new URL(str);
} catch (MalformedURLException mue) {
feed.errors.add(new URLParseError(tableName, row, column));
if (str != null) {
try {
url = new URL(str);
} catch (MalformedURLException mue) {
feed.errors.add(new URLParseError(tableName, row, column));
}
}
return url;
}

protected double getDoubleField(String column, boolean required, double min, double max) throws IOException {
String str = getFieldCheckRequired(column, required);
double val = Double.NaN;
if (str != null) try {
val = Double.parseDouble(str);
checkRangeInclusive(min, max, val);
} catch (NumberFormatException nfe) {
feed.errors.add(new NumberParseError(tableName, row, column));
if (str != null) {
try {
val = Double.parseDouble(str);
checkRangeInclusive(min, max, val);
} catch (NumberFormatException nfe) {
feed.errors.add(new NumberParseError(tableName, row, column));
}
}
return val;
}
Expand All @@ -239,7 +251,9 @@ protected <K, V> V getRefField(String column, boolean required, Map<K, V> target

protected abstract boolean isRequired();

/** Implemented by subclasses to read one row, produce one GTFS entity, and store that entity in a map. */
/**
* Implemented by subclasses to read one row, produce one GTFS entity, and store that entity in a map.
*/
protected abstract void loadOneRow() throws IOException;

/**
Expand Down Expand Up @@ -299,130 +313,15 @@ private void missing() {
LOG.info("Table {} was missing but it is not required.", tableName);
}
}
}

/**
* An output stream that cannot be closed. CSVWriters try to close their output streams when they are garbage-collected,
* which breaks if another CSV writer is still writing to the ZIP file.
*
* Apache Commons has something similar but it seemed silly to import another large dependency. Eventually Guava will have this,
* see Guava issue 1367. At that point we should switch to using Guava.
*/
private static class UncloseableOutputStream extends FilterOutputStream {
public UncloseableOutputStream(OutputStream out) {
super(out);
}

@Override
public void close () {
// no-op
return;
}
}

/**
* Write this entity to a CSV file. This should be subclassed in subclasses of Entity.
* The following (abstract) methods should be overridden in a subclass:
*
* writeHeaders(): write the headers to the CsvWriter writer.
* writeRow(E): write the passed-in object to the CsvWriter writer, potentially using the write*Field methods.
* iterator(): return an iterator over objects of this class (note that the feed is available at this.feed
* public Writer (GTFSFeed feed): this should super to Writer(GTFSFeed feed, String tableName), with the table name
* defined.
*
* @author mattwigway
*/
public static abstract class Writer<E extends Entity> {
private static final Logger LOG = LoggerFactory.getLogger(Writer.class);

protected final GTFSFeed feed; // the feed into which we are loading the entities
protected final String tableName; // name of corresponding table without .txt

protected CsvWriter writer;

/**
* one-based to match reader.
*/
protected long row;

protected Writer(GTFSFeed feed, String tableName) {
this.feed = feed;
this.tableName = tableName;
}

/**
* Write the CSV header.
*/
protected abstract void writeHeaders() throws IOException;

/**
* Write one row of the CSV from the passed-in object.
*/
protected abstract void writeOneRow(E obj) throws IOException;

/**
* Get an iterator over objects of this type.
*/
protected abstract Iterator<E> iterator();

public void writeTable (ZipOutputStream zip) throws IOException {
LOG.info("Writing GTFS table {}", tableName);

ZipEntry zipEntry = new ZipEntry(tableName + ".txt");
zip.putNextEntry(zipEntry);

// don't let CSVWriter close the stream when it is garbage-collected
OutputStream protectedOut = new UncloseableOutputStream(zip);
this.writer = new CsvWriter(protectedOut, ',', Charset.forName("UTF8"));

this.writeHeaders();

// write rows until there are none left.
row = 0;
Iterator<E> iter = this.iterator();
while (iter.hasNext()) {
if (++row % 500000 == 0) {
LOG.info("Record number {}", human(row));
}

writeOneRow(iter.next());
}

// closing the writer closes the underlying output stream, so we don't do that.
writer.flush();
zip.closeEntry();

LOG.info("Wrote {} rows", human(row));
}

protected void writeStringField(String str) throws IOException {
writer.write(str);
}

protected void writeUrlField(URL obj) throws IOException {
writeStringField(obj != null ? obj.toString() : "");
}

/**
* Writes date as YYYYMMDD
*/
protected void writeDateField (LocalDate d) throws IOException {
writeStringField(d.format(DateTimeFormatter.BASIC_ISO_DATE));
}

/**
* Take a time expressed in seconds since noon - 12h (midnight, usually) and write it in HH:MM:SS format.
*/
protected void writeTimeField (int secsSinceMidnight) throws IOException {
if (secsSinceMidnight == INT_MISSING) {
writeStringField("");
return;
private String human(long n) {
if (n >= 1000000) return String.format(Locale.getDefault(), "%.1fM", n / 1000000.0);
if (n >= 1000) {return String.format(Locale.getDefault(), "%.1fk", n / 1000.0);} else {
return String.format(Locale.getDefault(), "%d", n);
}

writeStringField(convertToGtfsTime(secsSinceMidnight));
}

public static String convertToGtfsTime (int secsSinceMidnight) {
public static String convertToGtfsTime(int secsSinceMidnight) {
int seconds = secsSinceMidnight % 60;
secsSinceMidnight -= seconds;
// note that the minute and hour values are still expressed in seconds until we write it out, to avoid unnecessary division.
Expand All @@ -433,45 +332,6 @@ public static String convertToGtfsTime (int secsSinceMidnight) {
// integer divide is fine as we've subtracted off remainders
return String.format(Locale.getDefault(), "%02d:%02d:%02d", secsSinceMidnight / 3600, minutes / 60, seconds);
}

protected void writeIntField (Integer val) throws IOException {
if (val.equals(INT_MISSING))
writeStringField("");
else
writeStringField(val.toString());
}

/**
* Write a double value, with precision 10^-7. NaN is written as "".
*/
protected void writeDoubleField (double val) throws IOException {
// NaN's represent missing values
if (Double.isNaN(val))
writeStringField("");

// control file size: don't use unnecessary precision
// This is usually used for coordinates; one ten-millionth of a degree at the equator is 1.1cm,
// and smaller elsewhere on earth, plenty precise enough.
// On Jupiter, however, it's a different story.
// Use the US locale so that . is used as the decimal separator
else
writeStringField(String.format(Locale.US, "%.7f", val));
}

/**
* End a row.
* This is just a proxy to the writer, but could be used for hooks in the future.
*/
public void endRecord () throws IOException {
writer.endRecord();
}
}


// shared code between reading and writing
private static final String human (long n) {
if (n >= 1000000) return String.format(Locale.getDefault(), "%.1fM", n/1000000.0);
if (n >= 1000) return String.format(Locale.getDefault(), "%.1fk", n/1000.0);
else return String.format(Locale.getDefault(), "%d", n);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import java.io.IOException;
import java.util.Locale;

import static com.conveyal.gtfs.model.Entity.Writer.convertToGtfsTime;
import static com.conveyal.gtfs.model.Entity.Loader.convertToGtfsTime;

public class Frequency extends Entity implements Comparable<Frequency> {
/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
import java.util.HashMap;
import java.util.Map;

/**
* A supplement wrapper for the old javacsv based implementation.
*/
public class CsvReader {

private final CSVReader reader;
Expand Down
Loading

0 comments on commit c5ee210

Please sign in to comment.