weblyzard
diff --git a/‎java-examples/keyword-extraction/pom.xml
Lines changed: 30 additions & 17 deletions b/‎java-examples/keyword-extraction/pom.xml
Lines changed: 30 additions & 17 deletions
diff --git a/‎java-examples/keyword-extraction/src/main/java/com/weblyzard/api/example/keyword/KeywordExtractor.java
Lines changed: 140 additions & 142 deletions b/‎java-examples/keyword-extraction/src/main/java/com/weblyzard/api/example/keyword/KeywordExtractor.java
Lines changed: 140 additions & 142 deletions
@@ -25,23 +25,6 @@
 			</extension>
 		</extensions>
 		<plugins>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-eclipse-plugin</artifactId>
-				<version>2.10</version>
-				<configuration>
-					<additionalConfig>
-						<file>
-							<name>.settings/org.eclipse.jdt.core.prefs</name>
-							<url>https://raw.githubusercontent.com/weblyzard/eclipse-settings/master/settings/org.eclipse.jdt.core.prefs</url>
-						</file>
-						<file>
-							<name>.settings/org.eclipse.jdt.ui.prefs</name>
-							<url>https://raw.githubusercontent.com/weblyzard/eclipse-settings/master/settings/org.eclipse.jdt.ui.prefs</url>
-						</file>
-					</additionalConfig>
-				</configuration>
-			</plugin>
 			<plugin>
 				<groupId>org.apache.maven.plugins</groupId>
 				<artifactId>maven-compiler-plugin</artifactId>
@@ -52,6 +35,36 @@
 				</configuration>
 			</plugin>
 			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-checkstyle-plugin</artifactId>
+				<version>3.0.0</version>
+				<dependencies>
+					<dependency>
+						<groupId>com.puppycrawl.tools</groupId>
+						<artifactId>checkstyle</artifactId>
+						<version>8.14</version>
+					</dependency>
+				</dependencies>
+				<executions>
+					<execution>
+						<id>checkstyle</id>
+						<phase>validate</phase>
+						<configuration>
+							<!-- <configLocation>google_checks.xml</configLocation> -->
+							<configLocation>https://raw.githubusercontent.com/weblyzard/checkstyle-rules/master/java/checkstyle_relaxed.xml</configLocation>
+							<encoding>UTF-8</encoding>
+							<consoleOutput>true</consoleOutput>
+							<linkXRef>false</linkXRef>
+							<failOnViolation>true</failOnViolation>
+							<violationSeverity>warning</violationSeverity>
+						</configuration>
+						<goals>
+							<goal>check</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+				<plugin>
 				<groupId>org.sonarsource.scanner.maven</groupId>
 				<artifactId>sonar-maven-plugin</artifactId>
 				<version>3.4.0.905</version>
 
@@ -10,10 +10,8 @@
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
-
 import javax.ws.rs.WebApplicationException;
 import javax.xml.bind.JAXBException;
-
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.SerializationFeature;
 import com.google.devtools.common.options.OptionsParser;
@@ -23,7 +21,6 @@
 import com.weblyzard.api.model.document.Document;
 import com.weblyzard.api.model.document.MirrorDocument;
 import com.weblyzard.api.model.jesaja.KeywordCalculationProfile;
-
 import lombok.extern.slf4j.Slf4j;
 
 @Slf4j
@@ -34,143 +31,144 @@
  *
  */
 public class KeywordExtractor {
-	
-	private static JeremiaClient preProcessingClient;
-	private static JesajaClient keywordExtractionClient;
-	
-	private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT);
-	
-	private static KeywordCalculationProfile KEYWORD_PROFILE = new KeywordCalculationProfile()
-			.setValidPosTags(Set.of("NE", "NNS", "NN", "NNP", "P", "ADJ"))
-			.setMinPhraseSignificance(2)
-			.setNumKeywords(15)
-			.setKeywordAlgorithm("com.weblyzard.backend.jesaja.algorithm.keywords.YatesKeywordSignificanceAlgorithm")
-			.setMinTokenCount(5)
-			.setSkipUnderrepresentedKeywords(true);
-	
-	public static void main(String[] argv) throws IOException, JAXBException {
-		OptionsParser parser = OptionsParser.newOptionsParser(KeywordExtractorOption.class);
-		parser.parseAndExitUponError(argv);
-		KeywordExtractorOption options = parser.getOptions(KeywordExtractorOption.class);
-		
-		if (options.printHelp || options.webServiceBaseUrl.isEmpty() || options.profileName.isEmpty()) {
-			printUsage(parser);
-			return;
-		}
-		
-		// setup web services
-		setupWebServices(options);
-		
-		// train the component with the provided reference corpus
-		if (!options.referenceCorpusDirectory.isEmpty()) {
-			List<Document> documents = getDocuments(options.referenceCorpusDirectory);
-			trainJesaja(options.profileName, documents);
-		}
-		
-		// compute keywords
-		if (!options.targetCorpusDirectory.isEmpty()) {
-			List<Document> documents = getDocuments(options.targetCorpusDirectory);
-			try {
-				Map<String, Map<String, Double>> keywords = keywordExtractionClient.getKeywords(options.profileName, documents);
-				System.out.println(OBJECT_MAPPER.writeValueAsString(keywords));
-			} catch (WebApplicationException | JAXBException e) {
-				log.error("Cannot extract keywords: {}", e);
-				System.exit(-1);
-			}
-		}
-		
-	}
-	
-	/**
-	 * Setup and configure the Web services based on the provided {@link KeywordExtractorOption}s
-	 * 
-	 * @param options used for the Web service configuration
-	 */
-	private static void setupWebServices(KeywordExtractorOption options) {
-		WebserviceClientConfig jeremiaConfig = new WebserviceClientConfig().setUrl(options.webServiceBaseUrl)
-				.setUsername(options.webServiceUserName).setPassword(options.webServiceUserPassword);
-		WebserviceClientConfig jesajaConfig = new WebserviceClientConfig().setUrl(options.webServiceBaseUrl)
-				.setUsername(options.webServiceUserName).setPassword(options.webServiceUserPassword);
-		System.out.println(jeremiaConfig);
-		// use standard service ports, if the web service has been deployed locally
-		if (options.webServiceBaseUrl.startsWith("http://localhost") || options.webServiceBaseUrl.startsWith("http://127.0.0.1")) {
-			jeremiaConfig.setServicePrefix(":63001");
-			jesajaConfig.setServicePrefix(":63002");
-		} 
-		
-		preProcessingClient = new JeremiaClient(jeremiaConfig);
-		keywordExtractionClient = new JesajaClient(jesajaConfig);
-		
-		// setup keyword service configuration
-		keywordExtractionClient.setKeywordProfile(options.profileName, KEYWORD_PROFILE);
-		keywordExtractionClient.setMatviewProfile(options.profileName, options.profileName);
-	}
-	
-	/**
-	 * Train the keyword extraction service with the documents provided in the reference
-	 * corpus.
-	 * 
-	 * @param profileName
-	 * @param documents
-	 */
-	private static void trainJesaja(String profileName, List<Document> documents) {
-		try {
-			while (keywordExtractionClient.rotateShard(profileName) == 0) {
-				keywordExtractionClient.addDocuments(profileName, documents);
-			}
-		} catch (WebApplicationException | JAXBException e) {
-			log.error("Cannot train keyword service: {}", e);
-			System.exit(-1);
-		}
-	}
-	
-
-	/**
-	 * Read all documents from the given directory, perform pre-processing and
-	 * convert them into a list of {@link Document} objects.
-	 * 
-	 * @param documentDirectory
-	 * @return
-	 */
-	private static List<Document> getDocuments(String documentDirectory) {
-		try {
-			return getDocuments(Files.list(Paths.get(documentDirectory)));
-		} catch (IOException e) {
-			log.error("Cannot open corpus directory '{}': {}", documentDirectory, e);
-			System.exit(-1);
-		}
-		return null;
-	}
-	
-	/**
-	 * @return a list of Document objects 
-	 */
-	private static List<Document> getDocuments(Stream<Path> documents) {
-		List<MirrorDocument> inputDocuments = documents.map(documentPath  -> 
-			{
-				try {
-					return new MirrorDocument().setId(documentPath.toString()).setBody(new String(Files.readAllBytes(documentPath)));
-				} catch (IOException e) {
-					log.warn("Cannot open input document '{}': {}", documentPath, e);
-					return null;
-				}
-			}
-		).filter(document -> document != null).collect(Collectors.toList());
-		
-		// create the input structure for the pre-processing web service
-		return preProcessingClient.submitDocuments(inputDocuments, "-1");
-	}
-	
-	
-
-	/** 
-	 * Provide usage information for the given {@link OptionsParser}.
-	 * 
-	 * @param parser
-	 */
-	private static void printUsage(OptionsParser parser) {
-		System.out.println("Usage: java -jar example-keyword-extractor OPTIONS");
-		System.out.println(parser.describeOptions(Collections.emptyMap(), OptionsParser.HelpVerbosity.LONG));
-	}
-	
+
+    private static JeremiaClient preProcessingClient;
+    private static JesajaClient keywordExtractionClient;
+
+    private static final ObjectMapper OBJECT_MAPPER =
+            new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT);
+
+    private static KeywordCalculationProfile KEYWORD_PROFILE = new KeywordCalculationProfile()
+            .setValidPosTags(Set.of("NE", "NNS", "NN", "NNP", "P", "ADJ"))
+            .setMinPhraseSignificance(2).setNumKeywords(15)
+            .setKeywordAlgorithm(
+                    "com.weblyzard.backend.jesaja.algorithm.keywords.YatesKeywordSignificanceAlgorithm")
+            .setMinTokenCount(5).setSkipUnderrepresentedKeywords(true);
+
+    public static void main(String[] argv) throws IOException, JAXBException {
+        OptionsParser parser = OptionsParser.newOptionsParser(KeywordExtractorOption.class);
+        parser.parseAndExitUponError(argv);
+        KeywordExtractorOption options = parser.getOptions(KeywordExtractorOption.class);
+
+        if (options.printHelp || options.webServiceBaseUrl.isEmpty()
+                || options.profileName.isEmpty()) {
+            printUsage(parser);
+            return;
+        }
+
+        // setup web services
+        setupWebServices(options);
+
+        // train the component with the provided reference corpus
+        if (!options.referenceCorpusDirectory.isEmpty()) {
+            List<Document> documents = getDocuments(options.referenceCorpusDirectory);
+            trainJesaja(options.profileName, documents);
+        }
+
+        // compute keywords
+        if (!options.targetCorpusDirectory.isEmpty()) {
+            List<Document> documents = getDocuments(options.targetCorpusDirectory);
+            try {
+                Map<String, Map<String, Double>> keywords =
+                        keywordExtractionClient.getKeywords(options.profileName, documents);
+                System.out.println(OBJECT_MAPPER.writeValueAsString(keywords));
+            } catch (WebApplicationException | JAXBException e) {
+                log.error("Cannot extract keywords: {}", e);
+                System.exit(-1);
+            }
+        }
+
+    }
+
+    /**
+     * Setup and configure the Web services based on the provided {@link KeywordExtractorOption}s.
+     * 
+     * @param options used for the Web service configuration
+     */
+    private static void setupWebServices(KeywordExtractorOption options) {
+        WebserviceClientConfig jeremiaConfig = new WebserviceClientConfig()
+                .setUrl(options.webServiceBaseUrl).setUsername(options.webServiceUserName)
+                .setPassword(options.webServiceUserPassword)
+                .setUseCompression(Boolean.getBoolean(options.useCompression));
+        WebserviceClientConfig jesajaConfig = new WebserviceClientConfig()
+                .setUrl(options.webServiceBaseUrl).setUsername(options.webServiceUserName)
+                .setPassword(options.webServiceUserPassword)
+                .setUseCompression(Boolean.getBoolean(options.useCompression));
+        System.out.println(jeremiaConfig);
+        // use standard service ports, if the web service has been deployed locally
+        if (options.webServiceBaseUrl.startsWith("http://localhost")
+                || options.webServiceBaseUrl.startsWith("http://127.0.0.1")) {
+            jeremiaConfig.setServicePrefix(":63001");
+            jesajaConfig.setServicePrefix(":63002");
+        }
+
+        preProcessingClient = new JeremiaClient(jeremiaConfig);
+        keywordExtractionClient = new JesajaClient(jesajaConfig);
+
+        // setup keyword service configuration
+        keywordExtractionClient.setKeywordProfile(options.profileName, KEYWORD_PROFILE);
+        keywordExtractionClient.setMatviewProfile(options.profileName, options.profileName);
+    }
+
+    /**
+     * Train the keyword extraction service with the documents provided in the reference corpus.
+     * 
+     * @param profileName the name of the profile to train
+     * @param documents the {@link Document}s used for training
+     */
+    private static void trainJesaja(String profileName, List<Document> documents) {
+        try {
+            while (keywordExtractionClient.rotateShard(profileName) == 0) {
+                keywordExtractionClient.addDocuments(profileName, documents);
+            }
+        } catch (WebApplicationException | JAXBException e) {
+            log.error("Cannot train keyword service: {}", e);
+            System.exit(-1);
+        }
+    }
+
+    /**
+     * Read all documents from the given directory, perform pre-processing and convert them into a
+     * list. of {@link Document} objects.
+     * 
+     * @param documentDirectory the directory containing the documents
+     * @return the list of {@link Document} objects read from the directory
+     */
+    private static List<Document> getDocuments(String documentDirectory) {
+        try {
+            return getDocuments(Files.list(Paths.get(documentDirectory)));
+        } catch (IOException e) {
+            log.error("Cannot open corpus directory '{}': {}", documentDirectory, e);
+            System.exit(-1);
+        }
+        return null;
+    }
+
+    /**
+     * Returns a list of Document objects.
+     */
+    private static List<Document> getDocuments(Stream<Path> documents) {
+        List<MirrorDocument> inputDocuments = documents.map(documentPath -> {
+            try {
+                return new MirrorDocument().setId(documentPath.toString())
+                        .setBody(new String(Files.readAllBytes(documentPath)));
+            } catch (IOException e) {
+                log.warn("Cannot open input document '{}': {}", documentPath, e);
+                return null;
+            }
+        }).filter(document -> document != null).collect(Collectors.toList());
+
+        // create the input structure for the pre-processing web service
+        return preProcessingClient.submitDocuments(inputDocuments, "-1");
+    }
+
+    /**
+     * Provide usage information for the given {@link OptionsParser}.
+     */
+    private static void printUsage(OptionsParser parser) {
+        System.out.println("Usage: java -jar example-keyword-extractor OPTIONS");
+        System.out.println(
+                parser.describeOptions(Collections.emptyMap(), OptionsParser.HelpVerbosity.LONG));
+    }
+
 }