Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
costerwi committed Mar 31, 2015
1 parent 1005145 commit 9b0fd28
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 0 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,26 @@
# zipdoc
Git textconv program to dump a zip file's text contents to stdout

## Install
Store Zipdoc.class somewhere in your home directory, for example ~/bin.

Define the diff filter in ~/.gitconfig :
```
git config --global --replace-all diff.zipdoc.textconv "java -cp ~/bin Zipdoc"
```

Assign diff attributes to paths in .gitattributes: (also assigning the [rezip filter](https://github.com/costerwi/rezip) for efficient storage)
```
# MS Office
*.docx filter=zip diff=zipdoc
*.xlsx filter=zip diff=zipdoc
*.pptx filter=zip diff=zipdoc
# OpenOffice
*.odt filter=zip diff=zipdoc
*.ods filter=zip diff=zipdoc
*.odp filter=zip diff=zipdoc
# Misc
*.mcdx filter=zip diff=zipdoc
*.slx filter=zip diff=zipdoc
```

70 changes: 70 additions & 0 deletions Zipdoc.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import java.io.*;
import java.util.zip.*;

import org.xml.sax.InputSource;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.TransformerException;

public class Zipdoc {
/**
* Read specified zip document file and output text to stdout.
*
* The program takes aa single argument, the name of the file to convert,
* and produces resulting text on stdout.
* {@link https://github.com/costerwi/zipdoc}
*/
public static void main(String argv[]) throws IOException, TransformerException {
if (1 != argv.length) {
System.err.println("Usage: Zipdoc infile >textconv.txt");
System.exit(1);
}
ZipInputStream source_zip = new ZipInputStream(new FileInputStream(argv[0]));

Transformer serializer = SAXTransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
serializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");

byte[] buffer = new byte[8192];
ZipEntry entry;
ByteArrayOutputStream uncomp_bs = new ByteArrayOutputStream();
CRC32 cksum = new CRC32();
CheckedOutputStream uncomp_os = new CheckedOutputStream(uncomp_bs, cksum);
try {
while ((entry = source_zip.getNextEntry()) != null) {
uncomp_bs.reset();
cksum.reset();

System.out.println("Subfile:\t" + entry);

// Copy file from source_zip into uncompressed, checksummed output stream
int len = 0;
while ((len = source_zip.read(buffer)) > 0) {
uncomp_os.write(buffer, 0, len);
}
source_zip.closeEntry();

if (entry.getName().endsWith(".xml")) {
// xml file: pretty-print the data to stdout
InputSource in = new InputSource(new ByteArrayInputStream(uncomp_bs.toByteArray()));
serializer.transform(new SAXSource(in), new StreamResult(System.out));
} else if (entry.getName().endsWith(".txt")) {
// Text file: dump directly to stdout
uncomp_bs.writeTo(System.out);
} else {
// Unknown file type: report uncompressed size and CRC32
System.out.println("Filesize:\t" + uncomp_bs.size());
System.out.println("Checksum:\t" + Long.toHexString(cksum.getValue()));
}
System.out.println();
}
} finally {
source_zip.close();
}
}
}

0 comments on commit 9b0fd28

Please sign in to comment.