Skip to content

Commit

Permalink
v1.2.6
Browse files Browse the repository at this point in the history
  • Loading branch information
rccarrasco committed Dec 11, 2013
1 parent dbd6e6b commit d6be8d8
Show file tree
Hide file tree
Showing 14 changed files with 194 additions and 81 deletions.
26 changes: 26 additions & 0 deletions dependency-reduced-pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,32 @@
</archive>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
<configuration>
<finalName>ocrevaluationfull</finalName>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<appendAssemblyId>false</appendAssemblyId>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>eu.digitisation.MainGUI</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<version>1.3.1</version>
Expand Down
17 changes: 9 additions & 8 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<groupId>eu.digitisation</groupId>
<artifactId>ocrevalUAtion</artifactId>
<name>ocrevalUAtion</name>
<version>1.2.5</version>
<version>1.2.6</version>
<packaging>jar</packaging>
<description>OCR Evaluation Tool</description>
<organization>
Expand All @@ -29,7 +29,7 @@
<jdk.version>1.7</jdk.version>
</properties>
<build>
<finalName>ocrevaluation</finalName>
<!--finalName>ocrevaluation</finalName-->
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
Expand Down Expand Up @@ -63,10 +63,11 @@
</archive>
</configuration>
</plugin>
<!--plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<finalName>ocrevaluation</finalName>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
Expand All @@ -87,8 +88,8 @@
</goals>
</execution>
</executions>
</plugin-->
<plugin>
</plugin>
<!--plugin>
<inherited>true</inherited>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
Expand All @@ -109,8 +110,8 @@
</configuration>
</execution>
</executions>
</plugin>
<plugin>
</plugin-->
<!--plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.1</version>
Expand Down Expand Up @@ -147,7 +148,7 @@
</configuration>
</execution>
</executions>
</plugin>
</plugin-->
</plugins>
</build>
<dependencies>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/eu/digitisation/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class Main {
static final String helpMsg = "Usage:\t"
+ "ocrevalUAtion -gt file1 [encoding] "
+ "-ocr file2 [encoding] "
+ "-d output_dir [-r replacements_file]";
+ "-d output_dir [-r equivalences_file]";

private static void exit_gracefully() {
System.err.println(helpMsg);
Expand Down
29 changes: 25 additions & 4 deletions src/main/java/eu/digitisation/MainGUI.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,21 @@ public MainGUI() {
repaint();
}

/**
*
* @return true if all required files have been selected
*/
private boolean checkInputFiles() {
boolean ready = true;
Component[] components = pane.getComponents();

for (int n = 0; n < 2; ++n) {
boolean[] required = {true, true, false};

for (int n = 0; n < 3; ++n) {
InputFileSelector ifs = (InputFileSelector) components[n];
if (ifs.accepted()) {
files[n] = ifs.getFile();
}
if (!(ifs.accepted() && files[n].exists())) {
if (required[n] && !(ifs.accepted() && files[n].exists())) {
ifs.shade(Color.decode("#fffacd"));
ifs.repaint();
ready = false;
Expand All @@ -92,6 +97,18 @@ private boolean checkInputFiles() {
return ready;
}

/**
* Show a warning message
* @param text the text to be displayed
*/
private void warning(String text) {
InputFileSelector ifs = (InputFileSelector)pane.getComponent(0);
ifs.setForeground(Color.RED);
ifs.shade(Color.decode("#fffacd"));
ifs.setText(text);
ifs.repaint();
}

@Override
public void actionPerformed(ActionEvent e) {
JButton pressed = (JButton) e.getSource();
Expand Down Expand Up @@ -121,7 +138,11 @@ public void actionPerformed(ActionEvent e) {
System.out.println(uri);
Desktop.getDesktop().browse(uri);
}
} catch (Exception ex) {
} catch (InvalidObjectException ex) {
warning(ex.getMessage());
} catch (URISyntaxException ex) {
Logger.getLogger(MainGUI.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(MainGUI.class.getName()).log(Level.SEVERE, null, ex);
}
}
Expand Down
17 changes: 11 additions & 6 deletions src/main/java/eu/digitisation/distance/Aligner.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class Aligner {
// style for unaligned segments
final static String uStyle = "background-color:aquamarine";
// style for highlight replacement in parallel text
final static String twinStyle = "";
final static String twinStyle = "";

/**
* @return 3-wise minimum.
Expand Down Expand Up @@ -109,11 +109,14 @@ private static EditTable align(String first, String second) {
* Shows text alignment based on a pseudo-Levenshtein distance where
* white-spaces are not allowed to be replaced with text or vice-versa
*
* @param first
* @param second
* @param header1 first text title for table head
* @param header2 second text title for table head
* @param first the first text
* @param second the second text
* @return a table in XHTML format showing the alignments
*/
public static Element alignmentMap(String first, String second) {
public static Element alignmentMap(String header1, String header2,
String first, String second) {
EditTable B = align(first, second);
DocumentBuilder builder = new DocumentBuilder("table");
Element table = builder.root();
Expand All @@ -134,10 +137,12 @@ public static Element alignmentMap(String first, String second) {
row = builder.addElement("tr");
cell1 = builder.addElement(row, "td");
cell2 = builder.addElement(row, "td");
cell1.setAttribute("width", "50%");
cell2.setAttribute("width", "50%");
cell1.setAttribute("align", "center");
cell2.setAttribute("align", "center");
builder.addTextElement(cell1, "h3", "Reference");
builder.addTextElement(cell2, "h3", "OCR");
builder.addTextElement(cell1, "h3", header1);
builder.addTextElement(cell2, "h3", header2);
row = builder.addElement("tr");
cell1 = builder.addElement(row, "td");
cell2 = builder.addElement(row, "td");
Expand Down
8 changes: 8 additions & 0 deletions src/main/java/eu/digitisation/gui/InputFileSelector.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ public void shade(Color color) {
area.setForeground(Color.DARK_GRAY);
}

/**
* Change descriptive text
* @param text the text to be displayed
*/
public void setText(String text) {
area.setText(text);
}

/**
*
* @return the selected input file
Expand Down
8 changes: 6 additions & 2 deletions src/main/java/eu/digitisation/io/Batch.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,29 @@ public class Batch {
public Batch(File dir1, File dir2) throws InvalidObjectException {
if (dir1.isDirectory()) {
files1 = dir1.listFiles();
java.util.Arrays.sort(files1);
} else {
String[] a = {};
files1 = new File[1];
files1[0] = dir1;
}
if (dir2.isDirectory()) {
files2 = dir2.listFiles();
java.util.Arrays.sort(files2);
} else {
files2 = new File[1];
files2[0] = dir2;
}
if (files1.length != files2.length) {
throw new java.io.InvalidObjectException(dir1 + " and " + dir2
throw new java.io.InvalidObjectException(dir1.getName()
+ " and " + dir2.getName()
+ " contain a different number of files");
} else {
size = files1.length;
}
if (!consistent()) {
throw new java.io.InvalidObjectException(dir1 + " and " + dir2
throw new java.io.InvalidObjectException(dir1.getName()
+ " and " + dir2.getName()
+ " contain files with inconsistent names");
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/eu/digitisation/io/CharFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public CharFilter(File file) {
BufferedReader reader = new BufferedReader(new FileReader(file));
while (reader.ready()) {
String line = reader.readLine();
String[] tokens = line.split("([,;])");
String[] tokens = line.split("([,;\t])");
if (tokens.length > 1) { // allow comments in line
String key = UnicodeReader.codepointsToString(tokens[0]);
String value = UnicodeReader.codepointsToString(tokens[1]);
Expand Down
1 change: 1 addition & 0 deletions src/main/java/eu/digitisation/io/TextContent.java
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ protected void readTextFile(File file) {
encoding = Encoding.detect(file);
}
System.err.println("Text file " + file + " encoding is " + encoding);

// read content
try {
FileInputStream fis = new FileInputStream(file);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/eu/digitisation/io/UnicodeReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ protected static String codepointsToString(String codes) throws IOException {
for (String token : tokens) {
if (token.length() % 4 != 0) {
throw new IOException(token
+ " is not a alid Unicode hex sequence");
+ " is not a valid Unicode hex sequence");
}
for (int pos = 0; pos + 3 < token.length(); pos += 4) {
String sub = token.substring(pos, pos + 4);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ public double cer() {
+ value(c, EdOp.SUBSTITUTE)
+ value(c, EdOp.DELETE);
}
return (spu + sub + add) / (double) tot * 100;

return (spu + sub + add) / (double) tot;
}
}
9 changes: 6 additions & 3 deletions src/main/java/eu/digitisation/ocrevaluation/Report.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ public Report(File gtfile, String gtencoding,
double cerDL = ErrorMeasure.cerDL(gts, ocrs);
double wer = ErrorMeasure.wer(gts, ocrs);
double ber = ErrorMeasure.ber(gts, ocrs);
Element alitab = Aligner.alignmentMap(gts, ocrs);
Element alitab = Aligner.alignmentMap(gtfile.getName(),
ocrfile.getName(), gts, ocrs);
CharStatTable stats = new CharStatTable(gts, ocrs);

// General info
Expand All @@ -121,7 +122,7 @@ public Report(Batch batch, String gtencoding, String ocrencoding,
File eqfile) {
super("html");
init();

CharFilter filter = (eqfile == null) ? null : new CharFilter(eqfile);
CharStatTable stats = new CharStatTable();
Element summaryTab;
Expand All @@ -144,8 +145,10 @@ public Report(Batch batch, String gtencoding, String ocrencoding,
TokenArray ocrarray = factory.newTokenArray(ocrs);
BagOfWords gtbag = new BagOfWords(gts);
BagOfWords ocrbag = new BagOfWords(ocrs);
Element alitab = Aligner.alignmentMap(gts, ocrs);
Element alitab = Aligner.alignmentMap(input.first.getName(),
input.second.getName(), gts, ocrs);
stats.add(gts, ocrs);
addTextElement(body, "div", " ");
addElement(body, alitab);
numwords += gtarray.length();
wdist += ArrayEditDistance.distance(gtarray.tokens(), ocrarray.tokens(),
Expand Down
Loading

0 comments on commit d6be8d8

Please sign in to comment.