diff --git a/src/main/java/com/widen/tabitha/formats/FormatAdapter.java b/src/main/java/com/widen/tabitha/formats/FormatAdapter.java
new file mode 100644
index 0000000..9c900c5
--- /dev/null
+++ b/src/main/java/com/widen/tabitha/formats/FormatAdapter.java
@@ -0,0 +1,58 @@
+package com.widen.tabitha.formats;
+
+import com.widen.tabitha.reader.ReaderOptions;
+import com.widen.tabitha.reader.RowReader;
+import com.widen.tabitha.writer.RowWriter;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * Provides factory methods for creating readers and writers of a particular format.
+ */
+public interface FormatAdapter {
+ /**
+ * Create a row reader for a file at the given path.
+ *
+ * @param path The path of the file to read.
+ * @param options Options to pass to the reader.
+ * @return A new row reader.
+ * @throws IOException if an I/O error occurs.
+ */
+ default RowReader createReader(Path path, ReaderOptions options) throws IOException {
+ return createReader(Files.newInputStream(path), options);
+ }
+
+ /**
+ * Create a row reader for an input stream.
+ *
+ * @param inputStream The input stream to read.
+ * @param options Options to pass to the reader.
+ * @return A new row reader.
+ * @throws IOException if an I/O error occurs.
+ */
+ RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException;
+
+ /**
+ * Create a row writer that writes to the given path.
+ *
+ * @param path The path to write to.
+ * @return A new row writer.
+ * @throws IOException if an I/O error occurs.
+ */
+ default RowWriter createWriter(Path path) throws IOException {
+ return createWriter(Files.newOutputStream(path));
+ }
+
+ /**
+ * Create a row writer that writes to the given output stream.
+ *
+ * @param outputStream The output stream to write to.
+ * @return A new row writer.
+ * @throws IOException if an I/O error occurs.
+ */
+ RowWriter createWriter(OutputStream outputStream) throws IOException;
+}
diff --git a/src/main/java/com/widen/tabitha/formats/FormatRegistry.java b/src/main/java/com/widen/tabitha/formats/FormatRegistry.java
new file mode 100644
index 0000000..b1de3d3
--- /dev/null
+++ b/src/main/java/com/widen/tabitha/formats/FormatRegistry.java
@@ -0,0 +1,110 @@
+package com.widen.tabitha.formats;
+
+import com.widen.tabitha.formats.delimited.DelimitedFormat;
+import com.widen.tabitha.formats.delimited.DelimitedRowReader;
+import com.widen.tabitha.formats.delimited.DelimitedRowWriter;
+import com.widen.tabitha.formats.excel.WorkbookRowWriter;
+import com.widen.tabitha.formats.excel.XLSRowReader;
+import com.widen.tabitha.formats.excel.XLSXRowReader;
+import com.widen.tabitha.reader.InlineHeaderReader;
+import com.widen.tabitha.reader.ReaderOptions;
+import com.widen.tabitha.reader.RowReader;
+import com.widen.tabitha.writer.RowWriter;
+import io.reactivex.Maybe;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Path;
+
+/**
+ * Manages the adapters for the file formats supported by Tabitha.
+ *
+ * You probably want to use {@link com.widen.tabitha.reader.RowReaders} or {@link com.widen.tabitha.writer.RowWriter}
+ * instead.
+ */
+public class FormatRegistry {
+ /**
+ * Get a format factory for handling the given MIME type.
+ *
+ * @param mimeType The format MIME type.
+ * @return A format adapter, if one could be found.
+ */
+ public static Maybe forMimeType(String mimeType) {
+ switch (mimeType) {
+ case "text/csv":
+ case "text/plain":
+ return Maybe.just(new FormatAdapter() {
+ @Override
+ public RowReader createReader(InputStream inputStream, ReaderOptions options) {
+ return decorateReader(new DelimitedRowReader(inputStream, DelimitedFormat.CSV), options);
+ }
+
+ @Override
+ public RowWriter createWriter(OutputStream outputStream) {
+ return new DelimitedRowWriter(outputStream, DelimitedFormat.CSV);
+ }
+ });
+
+ case "text/tab-separated-values":
+ return Maybe.just(new FormatAdapter() {
+ @Override
+ public RowReader createReader(InputStream inputStream, ReaderOptions options) {
+ return decorateReader(new DelimitedRowReader(inputStream, DelimitedFormat.TSV), options);
+ }
+
+ @Override
+ public RowWriter createWriter(OutputStream outputStream) {
+ return new DelimitedRowWriter(outputStream, DelimitedFormat.TSV);
+ }
+ });
+
+ case "application/vnd.ms-excel":
+ return Maybe.just(new FormatAdapter() {
+ @Override
+ public RowReader createReader(Path path, ReaderOptions options) throws IOException {
+ return decorateReader(XLSRowReader.open(path, options), options);
+ }
+
+ @Override
+ public RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException {
+ return decorateReader(XLSRowReader.open(inputStream, options), options);
+ }
+
+ @Override
+ public RowWriter createWriter(OutputStream outputStream) {
+ return WorkbookRowWriter.xls(outputStream);
+ }
+ });
+
+ case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+ case "application/x-tika-ooxml":
+ return Maybe.just(new FormatAdapter() {
+ @Override
+ public RowReader createReader(Path path, ReaderOptions options) throws IOException {
+ return decorateReader(XLSXRowReader.open(path, options), options);
+ }
+
+ @Override
+ public RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException {
+ return decorateReader(XLSXRowReader.open(inputStream, options), options);
+ }
+
+ @Override
+ public RowWriter createWriter(OutputStream outputStream) {
+ return WorkbookRowWriter.xlsx(outputStream);
+ }
+ });
+
+ default:
+ return Maybe.empty();
+ }
+ }
+
+ private static RowReader decorateReader(RowReader reader, ReaderOptions options) {
+ if (options.isInlineHeaders()) {
+ reader = new InlineHeaderReader(reader);
+ }
+ return reader;
+ }
+}
diff --git a/src/main/java/com/widen/tabitha/reader/Header.java b/src/main/java/com/widen/tabitha/reader/Header.java
index f4bf354..5e4f273 100644
--- a/src/main/java/com/widen/tabitha/reader/Header.java
+++ b/src/main/java/com/widen/tabitha/reader/Header.java
@@ -1,6 +1,12 @@
package com.widen.tabitha.reader;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
/**
* Defines an ordered list of named columns.
@@ -182,5 +188,7 @@ public DuplicateColumnException(String column) {
}
@Override
- public String toString() { return columnsByIndex.toString(); }
+ public String toString() {
+ return columnsByIndex.toString();
+ }
}
diff --git a/src/main/java/com/widen/tabitha/reader/InlineHeaderReader.java b/src/main/java/com/widen/tabitha/reader/InlineHeaderReader.java
index 11d38e7..d846635 100644
--- a/src/main/java/com/widen/tabitha/reader/InlineHeaderReader.java
+++ b/src/main/java/com/widen/tabitha/reader/InlineHeaderReader.java
@@ -8,12 +8,12 @@
/**
* Decorates another reader, interpreting the first row of each page of data as the header for subsequent rows.
*/
-class InlineHeaderReader implements RowReader {
+public class InlineHeaderReader implements RowReader {
private final RowReader inner;
private Header currentHeader;
private long currentPage = -1;
- InlineHeaderReader(RowReader inner) {
+ public InlineHeaderReader(RowReader inner) {
this.inner = inner;
}
diff --git a/src/main/java/com/widen/tabitha/reader/RowReaders.java b/src/main/java/com/widen/tabitha/reader/RowReaders.java
index a0eac09..23ca91b 100644
--- a/src/main/java/com/widen/tabitha/reader/RowReaders.java
+++ b/src/main/java/com/widen/tabitha/reader/RowReaders.java
@@ -1,18 +1,13 @@
package com.widen.tabitha.reader;
-import com.widen.tabitha.formats.delimited.DelimitedFormat;
-import com.widen.tabitha.formats.delimited.DelimitedRowReader;
-import com.widen.tabitha.formats.excel.XLSRowReader;
-import com.widen.tabitha.formats.excel.XLSXRowReader;
+import com.widen.tabitha.formats.FormatRegistry;
+import io.reactivex.Maybe;
import org.apache.tika.Tika;
import java.io.BufferedInputStream;
-import java.io.IOException;
import java.io.InputStream;
-import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.util.Optional;
/**
* Helper factory methods for creating row readers.
@@ -24,7 +19,7 @@ public class RowReaders {
* @param path The file path of the file to open.
* @return A row reader if the file is in a supported format.
*/
- public static Optional open(String path) throws Exception {
+ public static Maybe open(String path) {
return open(Paths.get(path), null);
}
@@ -34,7 +29,7 @@ public static Optional open(String path) throws Exception {
* @param path The file path of the file to open.
* @return A row reader if the file is in a supported format.
*/
- public static Optional open(Path path) throws Exception {
+ public static Maybe open(Path path) {
return open(path, null);
}
@@ -45,30 +40,11 @@ public static Optional open(Path path) throws Exception {
* @param options Options to pass to the reader.
* @return A row reader if the file is in a supported format.
*/
- public static Optional open(Path path, ReaderOptions options) throws Exception {
- if (options == null) {
- options = new ReaderOptions();
- }
-
- String mimeType = tika.detect(path);
-
- switch (mimeType) {
- case "text/csv":
- case "text/plain":
- return Optional.of(decorate(new DelimitedRowReader(Files.newInputStream(path), DelimitedFormat.CSV), options));
-
- case "text/tab-separated-values":
- return Optional.of(decorate(new DelimitedRowReader(Files.newInputStream(path), DelimitedFormat.TSV), options));
-
- case "application/vnd.ms-excel":
- return Optional.of(decorate(XLSRowReader.open(path, options), options));
-
- case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
- case "application/x-tika-ooxml":
- return Optional.of(decorate(XLSXRowReader.open(path, options), options));
- }
-
- return Optional.empty();
+ public static Maybe open(Path path, ReaderOptions options) {
+ return Maybe
+ .fromCallable(() -> tika.detect(path))
+ .flatMap(FormatRegistry::forMimeType)
+ .map(formatAdapter -> formatAdapter.createReader(path, options != null ? options : new ReaderOptions()));
}
/**
@@ -77,7 +53,7 @@ public static Optional open(Path path, ReaderOptions options) throws
* @param inputStream The input stream to read.
* @return A row reader if the stream is in a supported format.
*/
- public static Optional open(InputStream inputStream) throws IOException {
+ public static Maybe open(InputStream inputStream) {
return open(inputStream, null, null);
}
@@ -88,7 +64,7 @@ public static Optional open(InputStream inputStream) throws IOExcepti
* @param filename The filename associated with the stream, if known.
* @return A row reader if the stream is in a supported format.
*/
- public static Optional open(InputStream inputStream, String filename) throws IOException {
+ public static Maybe open(InputStream inputStream, String filename) {
return open(inputStream, filename, null);
}
@@ -99,7 +75,7 @@ public static Optional open(InputStream inputStream, String filename)
* @param options Options to pass to the reader.
* @return A row reader if the stream is in a supported format.
*/
- public static Optional open(InputStream inputStream, ReaderOptions options) throws IOException {
+ public static Maybe open(InputStream inputStream, ReaderOptions options) {
return open(inputStream, null, options);
}
@@ -111,44 +87,15 @@ public static Optional open(InputStream inputStream, ReaderOptions op
* @param options Options to pass to the reader.
* @return A row reader if the stream is in a supported format.
*/
- public static Optional open(
- InputStream inputStream,
- String filename,
- ReaderOptions options
- ) throws IOException {
- if (options == null) {
- options = new ReaderOptions();
- }
-
+ public static Maybe open(InputStream inputStream, String filename, ReaderOptions options) {
// If our input stream supports marks, Tika will rewind the stream back to the start for us after detecting the
// format, so ensure our input stream supports it.
- inputStream = createRewindableInputStream(inputStream);
- String mimeType = tika.detect(inputStream, filename);
-
- switch (mimeType) {
- case "text/csv":
- case "text/plain":
- return Optional.of(decorate(new DelimitedRowReader(inputStream, DelimitedFormat.CSV), options));
-
- case "text/tab-separated-values":
- return Optional.of(decorate(new DelimitedRowReader(inputStream, DelimitedFormat.TSV), options));
-
- case "application/vnd.ms-excel":
- return Optional.of(decorate(XLSRowReader.open(inputStream, options), options));
-
- case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
- case "application/x-tika-ooxml":
- return Optional.of(decorate(XLSXRowReader.open(inputStream, options), options));
- }
-
- return Optional.empty();
- }
+ InputStream rewindableStream = createRewindableInputStream(inputStream);
- private static RowReader decorate(RowReader reader, ReaderOptions options) {
- if (options.isInlineHeaders()) {
- reader = new InlineHeaderReader(reader);
- }
- return reader;
+ return Maybe
+ .fromCallable(() -> tika.detect(rewindableStream, filename))
+ .flatMap(FormatRegistry::forMimeType)
+ .map(formatAdapter -> formatAdapter.createReader(rewindableStream, options != null ? options : new ReaderOptions()));
}
private static InputStream createRewindableInputStream(InputStream inputStream) {
diff --git a/src/main/java/com/widen/tabitha/writer/RowWriters.java b/src/main/java/com/widen/tabitha/writer/RowWriters.java
index eb58381..cbe8bb1 100644
--- a/src/main/java/com/widen/tabitha/writer/RowWriters.java
+++ b/src/main/java/com/widen/tabitha/writer/RowWriters.java
@@ -1,13 +1,10 @@
package com.widen.tabitha.writer;
-import com.widen.tabitha.formats.delimited.DelimitedFormat;
-import com.widen.tabitha.formats.delimited.DelimitedRowWriter;
-import com.widen.tabitha.formats.excel.WorkbookRowWriter;
-import org.apache.commons.io.FilenameUtils;
+import com.widen.tabitha.formats.FormatRegistry;
+import io.reactivex.Maybe;
+import org.apache.tika.Tika;
-import java.io.IOException;
import java.io.OutputStream;
-import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
@@ -21,7 +18,7 @@ public class RowWriters {
* @param path The path to open.
* @return A row writer for the given path.
*/
- public static RowWriter create(String path) throws IOException {
+ public static Maybe create(String path) {
return create(Paths.get(path));
}
@@ -31,30 +28,26 @@ public static RowWriter create(String path) throws IOException {
* @param path The path to open.
* @return A row writer for the given file.
*/
- public static RowWriter create(Path path) throws IOException {
- return create(Files.newOutputStream(path), path.getFileName().toString());
+ public static Maybe create(Path path) {
+ return Maybe
+ .fromCallable(() -> tika.detect(path))
+ .flatMap(FormatRegistry::forMimeType)
+ .map(formatAdapter -> formatAdapter.createWriter(path));
}
/**
* Create a new row writer for the given output stream and guess the output format based on a filename.
*
+ * @param outputStream The output stream to write to.
+ * @param name The name of the output file or format.
* @return A row writer for the given output stream.
*/
- public static RowWriter create(OutputStream outputStream, String filename) {
- String extension = FilenameUtils.getExtension(filename);
-
- if ("xlsx".equals(extension)) {
- return WorkbookRowWriter.xlsx(outputStream);
- }
-
- if ("xls".equals(extension)) {
- return WorkbookRowWriter.xls(outputStream);
- }
-
- if ("tsv".equals(extension)) {
- return new DelimitedRowWriter(outputStream, DelimitedFormat.TSV);
- }
-
- return new DelimitedRowWriter(outputStream, DelimitedFormat.CSV);
+ public static Maybe create(OutputStream outputStream, String name) {
+ return FormatRegistry
+ .forMimeType(tika.detect(name))
+ .map(formatAdapter -> formatAdapter.createWriter(outputStream));
}
+
+ // Apache Tika instance for detecting MIME types.
+ private static final Tika tika = new Tika();
}
diff --git a/src/test/groovy/com/widen/tabitha/HeaderTest.groovy b/src/test/groovy/com/widen/tabitha/HeaderTest.groovy
index fcb5cc0..6b9130b 100644
--- a/src/test/groovy/com/widen/tabitha/HeaderTest.groovy
+++ b/src/test/groovy/com/widen/tabitha/HeaderTest.groovy
@@ -1,7 +1,7 @@
package com.widen.tabitha
import com.widen.tabitha.reader.Header
-import spock.lang.*
+import spock.lang.Specification
class HeaderTest extends Specification {
def "build with duplicate column names"() {
@@ -32,10 +32,10 @@ class HeaderTest extends Specification {
def "get columns by index"() {
given:
def header = new Header.Builder()
- .add("foo")
- .add("bar")
- .add("baz")
- .build()
+ .add("foo")
+ .add("bar")
+ .add("baz")
+ .build()
expect:
header.nameOf(0).get() == "foo"
diff --git a/src/test/groovy/com/widen/tabitha/RowReaderTest.groovy b/src/test/groovy/com/widen/tabitha/RowReaderTest.groovy
index 7671888..8d743d8 100644
--- a/src/test/groovy/com/widen/tabitha/RowReaderTest.groovy
+++ b/src/test/groovy/com/widen/tabitha/RowReaderTest.groovy
@@ -2,7 +2,7 @@ package com.widen.tabitha
import com.widen.tabitha.reader.Row
import com.widen.tabitha.reader.RowReader
-import spock.lang.*
+import spock.lang.Specification
class RowReaderTest extends Specification {
def "empty reader"() {
diff --git a/src/test/groovy/com/widen/tabitha/RowReadersTest.groovy b/src/test/groovy/com/widen/tabitha/RowReadersTest.groovy
index f0f2d16..31567e7 100644
--- a/src/test/groovy/com/widen/tabitha/RowReadersTest.groovy
+++ b/src/test/groovy/com/widen/tabitha/RowReadersTest.groovy
@@ -1,7 +1,7 @@
package com.widen.tabitha
import com.widen.tabitha.reader.RowReaders
-import spock.lang.*
+import spock.lang.Specification
class RowReadersTest extends Specification {
def "open a CSV file"() {
@@ -10,7 +10,7 @@ class RowReadersTest extends Specification {
def reader = RowReaders.open(file)
expect:
- reader.isPresent()
+ !reader.isEmpty().blockingGet()
}
def "open a CSV stream"() {
@@ -19,7 +19,7 @@ class RowReadersTest extends Specification {
def reader = RowReaders.open(stream)
expect:
- reader.isPresent()
+ !reader.isEmpty().blockingGet()
}
def "open an XLS file"() {
@@ -28,7 +28,7 @@ class RowReadersTest extends Specification {
def reader = RowReaders.open(file)
expect:
- reader.isPresent()
+ !reader.isEmpty().blockingGet()
}
def "open an XLS stream"() {
@@ -37,7 +37,7 @@ class RowReadersTest extends Specification {
def reader = RowReaders.open(stream)
expect:
- reader.isPresent()
+ !reader.isEmpty().blockingGet()
}
def "open an XLSX file"() {
@@ -46,7 +46,7 @@ class RowReadersTest extends Specification {
def reader = RowReaders.open(file)
expect:
- reader.isPresent()
+ !reader.isEmpty().blockingGet()
}
def "open an XLSX stream"() {
@@ -55,6 +55,6 @@ class RowReadersTest extends Specification {
def reader = RowReaders.open(stream)
expect:
- reader.isPresent()
+ !reader.isEmpty().blockingGet()
}
}
diff --git a/src/test/groovy/com/widen/tabitha/VariantTest.groovy b/src/test/groovy/com/widen/tabitha/VariantTest.groovy
index 54a4c93..1db1432 100644
--- a/src/test/groovy/com/widen/tabitha/VariantTest.groovy
+++ b/src/test/groovy/com/widen/tabitha/VariantTest.groovy
@@ -1,6 +1,6 @@
package com.widen.tabitha
-import spock.lang.*
+import spock.lang.Specification
class VariantTest extends Specification {
def "of factory creates correct variant types"() {
diff --git a/src/test/groovy/com/widen/tabitha/formats/HiddenRowsTest.groovy b/src/test/groovy/com/widen/tabitha/formats/HiddenRowsTest.groovy
index ae61a7b..0725e0d 100644
--- a/src/test/groovy/com/widen/tabitha/formats/HiddenRowsTest.groovy
+++ b/src/test/groovy/com/widen/tabitha/formats/HiddenRowsTest.groovy
@@ -9,9 +9,9 @@ class HiddenRowsTest extends Specification {
def "Hidden rows are ignored"() {
setup:
def reader = RowReaders.open(
- Helpers.getResourceStream(file),
- new ReaderOptions().withIncludeHiddenRows(false)
- ).get()
+ Helpers.getResourceStream(file),
+ new ReaderOptions().withIncludeHiddenRows(false)
+ ).blockingGet()
expect:
reader.each { row ->
@@ -25,9 +25,9 @@ class HiddenRowsTest extends Specification {
def "Hidden rows are not ignored"() {
setup:
def reader = RowReaders.open(
- Helpers.getResourceStream(file),
- new ReaderOptions().withIncludeHiddenRows(true)
- ).get()
+ Helpers.getResourceStream(file),
+ new ReaderOptions().withIncludeHiddenRows(true)
+ ).blockingGet()
when:
def foundHidden = false